From 8ef07bac89120e7d1e35b2ad61815f5b17384e59 Mon Sep 17 00:00:00 2001 From: Alfonso Martinez Date: Tue, 25 Oct 2022 17:40:54 +0200 Subject: [PATCH 01/80] Failing case corrected and test added --- src/Functions/FunctionsJSON.cpp | 28 ++++++++++++++----- ...xtract_low_cardinality_from_json.reference | 2 ++ ...2473_extract_low_cardinality_from_json.sql | 3 ++ 3 files changed, 26 insertions(+), 7 deletions(-) create mode 100644 tests/queries/0_stateless/02473_extract_low_cardinality_from_json.reference create mode 100644 tests/queries/0_stateless/02473_extract_low_cardinality_from_json.sql diff --git a/src/Functions/FunctionsJSON.cpp b/src/Functions/FunctionsJSON.cpp index 2f0a0889847..9e44de2cb52 100644 --- a/src/Functions/FunctionsJSON.cpp +++ b/src/Functions/FunctionsJSON.cpp @@ -1303,13 +1303,27 @@ public: static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) { - ColumnString & col_str = assert_cast(dest); - auto & chars = col_str.getChars(); - WriteBufferFromVector buf(chars, AppendModeTag()); - traverse(element, buf); - buf.finalize(); - chars.push_back(0); - col_str.getOffsets().push_back(chars.size()); + if (dest.getDataType() == TypeIndex::LowCardinality) + { + ColumnString::Chars chars; + WriteBufferFromVector buf(chars, AppendModeTag()); + chars.push_back(0); + traverse(element, buf); + buf.finalize(); + std::string str = reinterpret_cast(chars.data()); + chars.push_back(0); + assert_cast(dest).insertData(str.data(), str.size()); + } + else + { + ColumnString & col_str = assert_cast(dest); + auto & chars = col_str.getChars(); + WriteBufferFromVector buf(chars, AppendModeTag()); + traverse(element, buf); + buf.finalize(); + chars.push_back(0); + col_str.getOffsets().push_back(chars.size()); + } return true; } diff --git a/tests/queries/0_stateless/02473_extract_low_cardinality_from_json.reference b/tests/queries/0_stateless/02473_extract_low_cardinality_from_json.reference new file mode 100644 index 00000000000..73eab066599 --- /dev/null +++ b/tests/queries/0_stateless/02473_extract_low_cardinality_from_json.reference @@ -0,0 +1,2 @@ +('{"b":{"c":1,"d":"str"}}','','','') +('{"b":{"c":1,"d":"str"}}','','','') diff --git a/tests/queries/0_stateless/02473_extract_low_cardinality_from_json.sql b/tests/queries/0_stateless/02473_extract_low_cardinality_from_json.sql new file mode 100644 index 00000000000..c9810c77720 --- /dev/null +++ b/tests/queries/0_stateless/02473_extract_low_cardinality_from_json.sql @@ -0,0 +1,3 @@ +-- Tags: no-fasttest +SELECT JSONExtract('{"a" : {"b" : {"c" : 1, "d" : "str"}}}', 'Tuple( a LowCardinality(String), b LowCardinality(String), c LowCardinality(String), d LowCardinality(String))'); +SELECT JSONExtract('{"a" : {"b" : {"c" : 1, "d" : "str"}}}', 'Tuple( a String, b LowCardinality(String), c LowCardinality(String), d LowCardinality(String))'); \ No newline at end of file From 241049763bedfeeef29e62c7c5b8698dd7fc4a7f Mon Sep 17 00:00:00 2001 From: Alfonso Martinez Date: Wed, 26 Oct 2022 11:50:45 +0200 Subject: [PATCH 02/80] Fixed failing tests --- src/Functions/FunctionsJSON.cpp | 17 ++++++++++++++--- .../02473_extract_low_cardinality_from_json.sql | 1 - ...2474_extract_fixedstring_from_json.reference | 10 ++++++++++ .../02474_extract_fixedstring_from_json.sql | 8 ++++++++ 4 files changed, 32 insertions(+), 4 deletions(-) create mode 100644 tests/queries/0_stateless/02474_extract_fixedstring_from_json.reference create mode 100644 tests/queries/0_stateless/02474_extract_fixedstring_from_json.sql diff --git a/src/Functions/FunctionsJSON.cpp b/src/Functions/FunctionsJSON.cpp index 9e44de2cb52..d0fd6e2397c 100644 --- a/src/Functions/FunctionsJSON.cpp +++ b/src/Functions/FunctionsJSON.cpp @@ -1330,12 +1330,23 @@ public: // We use insertResultToFixedStringColumn in case we are inserting raw data in a FixedString column static bool insertResultToFixedStringColumn(IColumn & dest, const Element & element, std::string_view) { - ColumnFixedString & col_str = assert_cast(dest); - auto & chars = col_str.getChars(); + ColumnFixedString::Chars chars; WriteBufferFromVector buf(chars, AppendModeTag()); traverse(element, buf); buf.finalize(); - col_str.insertDefault(); + + auto & col_str = assert_cast(dest); + + if (chars.size() > col_str.getN()) + return false; + + chars.push_back(0); + std::string str = reinterpret_cast(chars.data()); + + auto padded_str = str + std::string(col_str.getN() - std::min(col_str.getN(), str.length()), '\0'); + col_str.insertData(str.data(), str.size()); + + return true; } diff --git a/tests/queries/0_stateless/02473_extract_low_cardinality_from_json.sql b/tests/queries/0_stateless/02473_extract_low_cardinality_from_json.sql index c9810c77720..0a0ae398227 100644 --- a/tests/queries/0_stateless/02473_extract_low_cardinality_from_json.sql +++ b/tests/queries/0_stateless/02473_extract_low_cardinality_from_json.sql @@ -1,3 +1,2 @@ --- Tags: no-fasttest SELECT JSONExtract('{"a" : {"b" : {"c" : 1, "d" : "str"}}}', 'Tuple( a LowCardinality(String), b LowCardinality(String), c LowCardinality(String), d LowCardinality(String))'); SELECT JSONExtract('{"a" : {"b" : {"c" : 1, "d" : "str"}}}', 'Tuple( a String, b LowCardinality(String), c LowCardinality(String), d LowCardinality(String))'); \ No newline at end of file diff --git a/tests/queries/0_stateless/02474_extract_fixedstring_from_json.reference b/tests/queries/0_stateless/02474_extract_fixedstring_from_json.reference new file mode 100644 index 00000000000..783d12fcf1a --- /dev/null +++ b/tests/queries/0_stateless/02474_extract_fixedstring_from_json.reference @@ -0,0 +1,10 @@ +\0\0\0\0\0\0\0\0\0\0\0 +{"a":123456} +\0\0\0\0\0 +123456 +\0\0\0\0\0 +123456 +\0\0\0\0\0 +\0\0\0\0\0 +131231 +131231 diff --git a/tests/queries/0_stateless/02474_extract_fixedstring_from_json.sql b/tests/queries/0_stateless/02474_extract_fixedstring_from_json.sql new file mode 100644 index 00000000000..cfc47e00cba --- /dev/null +++ b/tests/queries/0_stateless/02474_extract_fixedstring_from_json.sql @@ -0,0 +1,8 @@ +SELECT JSONExtract('{"a": 123456}', 'FixedString(11)'); +SELECT JSONExtract('{"a": 123456}', 'FixedString(12)'); +SELECT JSONExtract('{"a": "123456"}', 'a', 'FixedString(5)'); +SELECT JSONExtract('{"a": "123456"}', 'a', 'FixedString(6)'); +SELECT JSONExtract('{"a": 123456}', 'a', 'FixedString(5)'); +SELECT JSONExtract('{"a": 123456}', 'a', 'FixedString(6)'); +SELECT JSONExtract(materialize('{"a": 131231}'), 'a', 'LowCardinality(FixedString(5))') FROM numbers(2); +SELECT JSONExtract(materialize('{"a": 131231}'), 'a', 'LowCardinality(FixedString(6))') FROM numbers(2); From c37b1542545ab4a3362afdd266a531a7eeca00ff Mon Sep 17 00:00:00 2001 From: Alfonso Martinez Date: Fri, 28 Oct 2022 12:37:59 +0200 Subject: [PATCH 03/80] Added reverted files and fixes for failing fuzzer tests --- src/Functions/FunctionsJSON.cpp | 106 +++++++++++++----- .../performance/low_cardinality_from_json.xml | 73 ++++++++++++ .../02452_check_low_cardinality.reference | 7 ++ .../02452_check_low_cardinality.sql | 54 +++++++++ ...ct_fixed_string_from_nested_json.reference | 1 + ..._extract_fixed_string_from_nested_json.sql | 6 + 6 files changed, 219 insertions(+), 28 deletions(-) create mode 100644 tests/performance/low_cardinality_from_json.xml create mode 100644 tests/queries/0_stateless/02452_check_low_cardinality.reference create mode 100644 tests/queries/0_stateless/02452_check_low_cardinality.sql create mode 100644 tests/queries/0_stateless/02455_extract_fixed_string_from_nested_json.reference create mode 100644 tests/queries/0_stateless/02455_extract_fixed_string_from_nested_json.sql diff --git a/src/Functions/FunctionsJSON.cpp b/src/Functions/FunctionsJSON.cpp index ea2d2cc94af..64c5ff3c2c8 100644 --- a/src/Functions/FunctionsJSON.cpp +++ b/src/Functions/FunctionsJSON.cpp @@ -20,18 +20,19 @@ #include #include -#include -#include -#include -#include -#include -#include +#include #include #include +#include +#include #include #include -#include +#include #include +#include +#include +#include +#include #include #include @@ -695,8 +696,16 @@ public: else return false; - auto & col_vec = assert_cast &>(dest); - col_vec.insertValue(value); + if (dest.getDataType() == TypeIndex::LowCardinality) + { + ColumnLowCardinality & col_low = assert_cast(dest); + col_low.insertData(reinterpret_cast(&value), sizeof(value)); + } + else + { + auto & col_vec = assert_cast &>(dest); + col_vec.insertValue(value); + } return true; } }; @@ -773,8 +782,17 @@ public: return JSONExtractRawImpl::insertResultToColumn(dest, element, {}); auto str = element.getString(); - ColumnString & col_str = assert_cast(dest); - col_str.insertData(str.data(), str.size()); + + if (dest.getDataType() == TypeIndex::LowCardinality) + { + ColumnLowCardinality & col_low = assert_cast(dest); + col_low.insertData(str.data(), str.size()); + } + else + { + ColumnString & col_str = assert_cast(dest); + col_str.insertData(str.data(), str.size()); + } return true; } }; @@ -803,25 +821,33 @@ struct JSONExtractTree } }; - class LowCardinalityNode : public Node + class LowCardinalityFixedStringNode : public Node { public: - LowCardinalityNode(DataTypePtr dictionary_type_, std::unique_ptr impl_) - : dictionary_type(dictionary_type_), impl(std::move(impl_)) {} + explicit LowCardinalityFixedStringNode(const size_t fixed_length_) : fixed_length(fixed_length_) { } bool insertResultToColumn(IColumn & dest, const Element & element) override { - auto from_col = dictionary_type->createColumn(); - if (impl->insertResultToColumn(*from_col, element)) - { - std::string_view value = from_col->getDataAt(0).toView(); - assert_cast(dest).insertData(value.data(), value.size()); - return true; - } - return false; + // If element is an object we delegate the insertion to JSONExtractRawImpl + if (element.isObject()) + return JSONExtractRawImpl::insertResultToLowCardinalityFixedStringColumn(dest, element, fixed_length); + else if (!element.isString()) + return false; + + auto str = element.getString(); + if (str.size() > fixed_length) + return false; + + // For the non low cardinality case of FixedString, the padding is done in the FixedString Column implementation. + // In order to avoid having to pass the data to a FixedString Column and read it back (which would slow down the execution) + // the data is padded here and written directly to the Low Cardinality Column + auto padded_str = str.data() + std::string(fixed_length - std::min(fixed_length, str.length()), '\0'); + + assert_cast(dest).insertData(padded_str.data(), padded_str.size()); + return true; } + private: - DataTypePtr dictionary_type; - std::unique_ptr impl; + const size_t fixed_length; }; class UUIDNode : public Node @@ -833,7 +859,15 @@ struct JSONExtractTree return false; auto uuid = parseFromString(element.getString()); - assert_cast(dest).insert(uuid); + if (dest.getDataType() == TypeIndex::LowCardinality) + { + ColumnLowCardinality & col_low = assert_cast(dest); + col_low.insertData(reinterpret_cast(&uuid), sizeof(uuid)); + } + else + { + assert_cast(dest).insert(uuid); + } return true; } }; @@ -853,6 +887,7 @@ struct JSONExtractTree assert_cast &>(dest).insert(result); return true; } + private: DataTypePtr data_type; }; @@ -871,13 +906,18 @@ struct JSONExtractTree public: bool insertResultToColumn(IColumn & dest, const Element & element) override { - if (!element.isString()) + if (element.isNull()) return false; - auto & col_str = assert_cast(dest); + + if (!element.isString()) + return JSONExtractRawImpl::insertResultToFixedStringColumn(dest, element, {}); + auto str = element.getString(); + auto & col_str = assert_cast(dest); if (str.size() > col_str.getN()) return false; col_str.insertData(str.data(), str.size()); + return true; } }; @@ -1099,9 +1139,19 @@ struct JSONExtractTree case TypeIndex::UUID: return std::make_unique(); case TypeIndex::LowCardinality: { + // The low cardinality case is treated in two different ways: + // For FixedString type, an especial class is implemented for inserting the data in the destination column, + // as the string length must be passed in order to check and pad the incoming data. + // For the rest of low cardinality types, the insertion is done in their corresponding class, adapting the data + // as needed for the insertData function of the ColumnLowCardinality. auto dictionary_type = typeid_cast(type.get())->getDictionaryType(); + if ((*dictionary_type).getTypeId() == TypeIndex::FixedString) + { + auto fixed_length = typeid_cast(dictionary_type.get())->getN(); + return std::make_unique(fixed_length); + } auto impl = build(function_name, dictionary_type); - return std::make_unique(dictionary_type, std::move(impl)); + return impl; } case TypeIndex::Decimal256: return std::make_unique>(type); case TypeIndex::Decimal128: return std::make_unique>(type); diff --git a/tests/performance/low_cardinality_from_json.xml b/tests/performance/low_cardinality_from_json.xml new file mode 100644 index 00000000000..ac6542ac503 --- /dev/null +++ b/tests/performance/low_cardinality_from_json.xml @@ -0,0 +1,73 @@ + + + + + string_json + + '{"a": "hi", "b": "hello", "c": "hola", "d": "see you, bye, bye"}' + + + + int_json + + '{"a": 11, "b": 2222, "c": 33333333, "d": 4444444444444444}' + + + + uuid_json + + '{"a": "2d49dc6e-ddce-4cd0-afb8-790956df54c4", "b": "2d49dc6e-ddce-4cd0-afb8-790956df54c3", "c": "2d49dc6e-ddce-4cd0-afb8-790956df54c1", "d": "2d49dc6e-ddce-4cd0-afb8-790956df54c1"}' + + + + low_cardinality_tuple_string + + 'Tuple(a LowCardinality(String), b LowCardinality(String), c LowCardinality(String), d LowCardinality(String) )' + + + + low_cardinality_tuple_fixed_string + + 'Tuple(a LowCardinality(FixedString(20)), b LowCardinality(FixedString(20)), c LowCardinality(FixedString(20)), d LowCardinality(FixedString(20)) )' + + + + low_cardinality_tuple_int8 + + 'Tuple(a LowCardinality(Int8), b LowCardinality(Int8), c LowCardinality(Int8), d LowCardinality(Int8) )' + + + + low_cardinality_tuple_int16 + + 'Tuple(a LowCardinality(Int16), b LowCardinality(Int16), c LowCardinality(Int16), d LowCardinality(Int16) )' + + + + low_cardinality_tuple_int32 + + 'Tuple(a LowCardinality(Int32), b LowCardinality(Int32), c LowCardinality(Int32), d LowCardinality(Int32) )' + + + + low_cardinality_tuple_int64 + + 'Tuple(a LowCardinality(Int64), b LowCardinality(Int64), c LowCardinality(Int64), d LowCardinality(Int64) )' + + + + low_cardinality_tuple_uuid + + 'Tuple(a LowCardinality(UUID), b LowCardinality(UUID), c LowCardinality(UUID), d LowCardinality(UUID) )' + + + + + SELECT 'fixed_string_json' FROM zeros(500000) WHERE NOT ignore(JSONExtract(materialize({string_json}), {low_cardinality_tuple_fixed_string})) FORMAT Null + SELECT 'string_json' FROM zeros(500000) WHERE NOT ignore(JSONExtract(materialize({string_json}), {low_cardinality_tuple_string})) FORMAT Null + SELECT 'int8_json' FROM zeros(500000) WHERE NOT ignore(JSONExtract(materialize({int_json}), {low_cardinality_tuple_int8})) FORMAT Null + SELECT 'int16_json' FROM zeros(500000) WHERE NOT ignore(JSONExtract(materialize({int_json}), {low_cardinality_tuple_int16})) FORMAT Null + SELECT 'int32_json' FROM zeros(500000) WHERE NOT ignore(JSONExtract(materialize({int_json}), {low_cardinality_tuple_int32})) FORMAT Null + SELECT 'int64_json' FROM zeros(500000) WHERE NOT ignore(JSONExtract(materialize({int_json}), {low_cardinality_tuple_int64})) FORMAT Null + SELECT 'uuid_json' FROM zeros(500000) WHERE NOT ignore(JSONExtract(materialize({uuid_json}), {low_cardinality_tuple_uuid})) FORMAT Null + \ No newline at end of file diff --git a/tests/queries/0_stateless/02452_check_low_cardinality.reference b/tests/queries/0_stateless/02452_check_low_cardinality.reference new file mode 100644 index 00000000000..700778e02c7 --- /dev/null +++ b/tests/queries/0_stateless/02452_check_low_cardinality.reference @@ -0,0 +1,7 @@ +('hi','hello','hola','see you, bye, bye') +('hi\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0','hello\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0','hola\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0','see you, bye, bye\0\0\0') +(11,0,0,0) +(11,2222,0,0) +(11,2222,33333333,0) +(11,2222,33333333,4444444444444444) +('2d49dc6e-ddce-4cd0-afb8-790956df54c4','2d49dc6e-ddce-4cd0-afb8-790956df54c3','2d49dc6e-ddce-4cd0-afb8-790956df54c1','2d49dc6e-ddce-4cd0-afb8-790956df54c1') diff --git a/tests/queries/0_stateless/02452_check_low_cardinality.sql b/tests/queries/0_stateless/02452_check_low_cardinality.sql new file mode 100644 index 00000000000..166be281405 --- /dev/null +++ b/tests/queries/0_stateless/02452_check_low_cardinality.sql @@ -0,0 +1,54 @@ +DROP TABLE IF EXISTS test_low_cardinality_string; +DROP TABLE IF EXISTS test_low_cardinality_uuid; +DROP TABLE IF EXISTS test_low_cardinality_int; +CREATE TABLE test_low_cardinality_string (data String) ENGINE MergeTree ORDER BY data; +CREATE TABLE test_low_cardinality_uuid (data String) ENGINE MergeTree ORDER BY data; +CREATE TABLE test_low_cardinality_int (data String) ENGINE MergeTree ORDER BY data; +INSERT INTO test_low_cardinality_string (data) VALUES ('{"a": "hi", "b": "hello", "c": "hola", "d": "see you, bye, bye"}'); +INSERT INTO test_low_cardinality_int (data) VALUES ('{"a": 11, "b": 2222, "c": 33333333, "d": 4444444444444444}'); +INSERT INTO test_low_cardinality_uuid (data) VALUES ('{"a": "2d49dc6e-ddce-4cd0-afb8-790956df54c4", "b": "2d49dc6e-ddce-4cd0-afb8-790956df54c3", "c": "2d49dc6e-ddce-4cd0-afb8-790956df54c1", "d": "2d49dc6e-ddce-4cd0-afb8-790956df54c1"}'); +SELECT JSONExtract(data, 'Tuple( + a LowCardinality(String), + b LowCardinality(String), + c LowCardinality(String), + d LowCardinality(String) + )') AS json FROM test_low_cardinality_string; +SELECT JSONExtract(data, 'Tuple( + a LowCardinality(FixedString(20)), + b LowCardinality(FixedString(20)), + c LowCardinality(FixedString(20)), + d LowCardinality(FixedString(20)) + )') AS json FROM test_low_cardinality_string; +SELECT JSONExtract(data, 'Tuple( + a LowCardinality(Int8), + b LowCardinality(Int8), + c LowCardinality(Int8), + d LowCardinality(Int8) + )') AS json FROM test_low_cardinality_int; +SELECT JSONExtract(data, 'Tuple( + a LowCardinality(Int16), + b LowCardinality(Int16), + c LowCardinality(Int16), + d LowCardinality(Int16) + )') AS json FROM test_low_cardinality_int; +SELECT JSONExtract(data, 'Tuple( + a LowCardinality(Int32), + b LowCardinality(Int32), + c LowCardinality(Int32), + d LowCardinality(Int32) + )') AS json FROM test_low_cardinality_int; +SELECT JSONExtract(data, 'Tuple( + a LowCardinality(Int64), + b LowCardinality(Int64), + c LowCardinality(Int64), + d LowCardinality(Int64) + )') AS json FROM test_low_cardinality_int; +SELECT JSONExtract(data, 'Tuple( + a LowCardinality(UUID), + b LowCardinality(UUID), + c LowCardinality(UUID), + d LowCardinality(UUID) + )') AS json FROM test_low_cardinality_uuid; +DROP TABLE test_low_cardinality_string; +DROP TABLE test_low_cardinality_uuid; +DROP TABLE test_low_cardinality_int; diff --git a/tests/queries/0_stateless/02455_extract_fixed_string_from_nested_json.reference b/tests/queries/0_stateless/02455_extract_fixed_string_from_nested_json.reference new file mode 100644 index 00000000000..3a528a24821 --- /dev/null +++ b/tests/queries/0_stateless/02455_extract_fixed_string_from_nested_json.reference @@ -0,0 +1 @@ +('{"b":{"c":1,"d":"str"}}\0') diff --git a/tests/queries/0_stateless/02455_extract_fixed_string_from_nested_json.sql b/tests/queries/0_stateless/02455_extract_fixed_string_from_nested_json.sql new file mode 100644 index 00000000000..449713d396f --- /dev/null +++ b/tests/queries/0_stateless/02455_extract_fixed_string_from_nested_json.sql @@ -0,0 +1,6 @@ +-- Tags: no-fasttest +DROP TABLE IF EXISTS test_fixed_string_nested_json; +CREATE TABLE test_fixed_string_nested_json (data String) ENGINE MergeTree ORDER BY data; +INSERT INTO test_fixed_string_nested_json (data) VALUES ('{"a" : {"b" : {"c" : 1, "d" : "str"}}}'); +SELECT JSONExtract(data, 'Tuple(a FixedString(24))') AS json FROM test_fixed_string_nested_json; +DROP TABLE test_fixed_string_nested_json; \ No newline at end of file From 9f61ed4d50f911d31081f86af6bfcc0db2af8e3b Mon Sep 17 00:00:00 2001 From: Alfonso Martinez Date: Mon, 31 Oct 2022 13:23:27 +0100 Subject: [PATCH 04/80] Added no-fasttest tag for JSON tests --- tests/queries/0_stateless/02452_check_low_cardinality.sql | 1 + .../queries/0_stateless/02474_extract_fixedstring_from_json.sql | 1 + 2 files changed, 2 insertions(+) diff --git a/tests/queries/0_stateless/02452_check_low_cardinality.sql b/tests/queries/0_stateless/02452_check_low_cardinality.sql index 166be281405..e9cb8c800c7 100644 --- a/tests/queries/0_stateless/02452_check_low_cardinality.sql +++ b/tests/queries/0_stateless/02452_check_low_cardinality.sql @@ -1,3 +1,4 @@ +-- Tags: no-fasttest DROP TABLE IF EXISTS test_low_cardinality_string; DROP TABLE IF EXISTS test_low_cardinality_uuid; DROP TABLE IF EXISTS test_low_cardinality_int; diff --git a/tests/queries/0_stateless/02474_extract_fixedstring_from_json.sql b/tests/queries/0_stateless/02474_extract_fixedstring_from_json.sql index cfc47e00cba..336dda411da 100644 --- a/tests/queries/0_stateless/02474_extract_fixedstring_from_json.sql +++ b/tests/queries/0_stateless/02474_extract_fixedstring_from_json.sql @@ -1,3 +1,4 @@ +-- Tags: no-fasttest SELECT JSONExtract('{"a": 123456}', 'FixedString(11)'); SELECT JSONExtract('{"a": 123456}', 'FixedString(12)'); SELECT JSONExtract('{"a": "123456"}', 'a', 'FixedString(5)'); From c8f9af1afa52a0d75500b70dfb7508b42e55a889 Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Wed, 2 Nov 2022 15:47:11 -0400 Subject: [PATCH 05/80] start renaming --- .../operations/system-tables/dictionaries.md | 4 +- .../external-dictionaries/_category_.yml | 4 +- .../external-dicts-dict-sources.md | 6 +- .../external-dicts-dict-structure.md | 2 +- .../external-dicts-dict.md | 4 +- .../external-dictionaries/external-dicts.md | 27 +++++--- docs/en/sql-reference/dictionaries/index.md | 4 +- .../dictionaries/internal-dicts.md | 4 +- .../statements/create/dictionary.md | 65 +++++++++++++++---- 9 files changed, 84 insertions(+), 36 deletions(-) diff --git a/docs/en/operations/system-tables/dictionaries.md b/docs/en/operations/system-tables/dictionaries.md index 112e2cc2cdf..4b256f0de97 100644 --- a/docs/en/operations/system-tables/dictionaries.md +++ b/docs/en/operations/system-tables/dictionaries.md @@ -3,7 +3,7 @@ slug: /en/operations/system-tables/dictionaries --- # dictionaries -Contains information about [external dictionaries](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md). +Contains information about [dictionaries](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md). Columns: @@ -33,7 +33,7 @@ Columns: - `lifetime_min` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Minimum [lifetime](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md) of the dictionary in memory, after which ClickHouse tries to reload the dictionary (if `invalidate_query` is set, then only if it has changed). Set in seconds. - `lifetime_max` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Maximum [lifetime](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md) of the dictionary in memory, after which ClickHouse tries to reload the dictionary (if `invalidate_query` is set, then only if it has changed). Set in seconds. - `loading_start_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Start time for loading the dictionary. -- `last_successful_update_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — End time for loading or updating the dictionary. Helps to monitor some troubles with external sources and investigate causes. +- `last_successful_update_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — End time for loading or updating the dictionary. Helps to monitor some troubles with dictionary sources and investigate the causes. - `loading_duration` ([Float32](../../sql-reference/data-types/float.md)) — Duration of a dictionary loading. - `last_exception` ([String](../../sql-reference/data-types/string.md)) — Text of the error that occurs when creating or reloading the dictionary if the dictionary couldn’t be created. - `comment` ([String](../../sql-reference/data-types/string.md)) — Text of the comment to dictionary. diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/_category_.yml b/docs/en/sql-reference/dictionaries/external-dictionaries/_category_.yml index 1f98223c54c..af79ff9af23 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/_category_.yml +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/_category_.yml @@ -1,8 +1,8 @@ position: 37 -label: 'External Dictionaries' +label: 'Dictionaries' collapsible: true collapsed: true link: type: generated-index - title: External Dictionaries + title: Dictionaries slug: /en/sql-reference/dictionaries/external-dictionaries diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md index d457f327e7a..b13295a84e7 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md @@ -1,12 +1,12 @@ --- slug: /en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources sidebar_position: 43 -sidebar_label: Sources of External Dictionaries +sidebar_label: Dictionary Sources --- -# Sources of External Dictionaries +# Dictionary Sources -An external dictionary can be connected to ClickHouse from many different sources. +A dictionary can be connected to ClickHouse from many different sources. If the dictionary is configured using an xml-file, the configuration looks like this: diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md index 895743c3b50..40664e0ced3 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md @@ -171,5 +171,5 @@ Configuration fields: **See Also** -- [Functions for working with external dictionaries](../../../sql-reference/functions/ext-dict-functions.md). +- [Functions for working with dictionaries](../../../sql-reference/functions/ext-dict-functions.md). diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict.md index 5c237eea8c7..90e447385f8 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict.md @@ -1,10 +1,10 @@ --- slug: /en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict sidebar_position: 40 -sidebar_label: Configuring an External Dictionary +sidebar_label: Configuring a Dictionary --- -# Configuring an External Dictionary +# Configuring a Dictionary If dictionary is configured using xml file, than dictionary configuration has the following structure: diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts.md index 095fb6360cd..1d97a12f452 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts.md @@ -4,17 +4,17 @@ sidebar_position: 39 sidebar_label: General Description --- -# External Dictionaries +# Dictionaries -You can add your own dictionaries from various data sources. The data source for a dictionary can be a local text or executable file, an HTTP(s) resource, or another DBMS. For more information, see “[Sources for external dictionaries](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md)”. +You can add your own dictionaries from various data sources. The source for a dictionary can be a ClickHouse table, a DDL query, a local text or executable file, an HTTP(s) resource, or another DBMS. For more information, see “[Dictionary Sources](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md)”. ClickHouse: - Fully or partially stores dictionaries in RAM. - Periodically updates dictionaries and dynamically loads missing values. In other words, dictionaries can be loaded dynamically. -- Allows to create external dictionaries with xml files or [DDL queries](../../../sql-reference/statements/create/dictionary.md). +- Allows creating dictionaries with xml files or [DDL queries](../../../sql-reference/statements/create/dictionary.md). -The configuration of external dictionaries can be located in one or more xml-files. The path to the configuration is specified in the [dictionaries_config](../../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-dictionaries_config) parameter. +The configuration of dictionaries can be located in one or more xml-files. The path to the configuration is specified in the [dictionaries_config](../../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-dictionaries_config) parameter. Dictionaries can be loaded at server startup or at first use, depending on the [dictionaries_lazy_load](../../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-dictionaries_lazy_load) setting. @@ -24,6 +24,16 @@ The [dictionaries](../../../operations/system-tables/dictionaries.md#system_tabl - Configuration parameters. - Metrics like amount of RAM allocated for the dictionary or a number of queries since the dictionary was successfully loaded. +## Creating a dictionary with a DDL query + +Dictionaries can be created with [DDL queries](../../../sql-reference/statements/create/dictionary.md). This does not require any additional records in a server configuration file. This allows dictionaries to be worked with as first-class entities, like tables or views. + +## Creating a dictionary with a configuration file + +:::note +Creating a dictionary with a configuration file is not applicable to ClickHouse Cloud. Please use DDL (see above). +::: + The dictionary configuration file has the following format: ``` xml @@ -44,18 +54,17 @@ The dictionary configuration file has the following format: You can [configure](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict.md) any number of dictionaries in the same file. -[DDL queries for dictionaries](../../../sql-reference/statements/create/dictionary.md) does not require any additional records in server configuration. They allow to work with dictionaries as first-class entities, like tables or views. :::note -You can convert values for a small dictionary by describing it in a `SELECT` query (see the [transform](../../../sql-reference/functions/other-functions.md) function). This functionality is not related to external dictionaries. +You can convert values for a small dictionary by describing it in a `SELECT` query (see the [transform](../../../sql-reference/functions/other-functions.md) function). This functionality is not related to dictionaries. ::: ## See Also -- [Configuring an External Dictionary](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict.md) +- [Configuring a Dictionary](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict.md) - [Storing Dictionaries in Memory](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md) - [Dictionary Updates](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md) -- [Sources of External Dictionaries](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md) +- [Dictionary Sources](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md) - [Dictionary Key and Fields](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md) -- [Functions for Working with External Dictionaries](../../../sql-reference/functions/ext-dict-functions.md) +- [Functions for Working with Dictionaries](../../../sql-reference/functions/ext-dict-functions.md) diff --git a/docs/en/sql-reference/dictionaries/index.md b/docs/en/sql-reference/dictionaries/index.md index eccd1215e30..b6aa62bdb47 100644 --- a/docs/en/sql-reference/dictionaries/index.md +++ b/docs/en/sql-reference/dictionaries/index.md @@ -12,6 +12,6 @@ ClickHouse supports special functions for working with dictionaries that can be ClickHouse supports: -- [Built-in dictionaries](../../sql-reference/dictionaries/internal-dicts.md#internal_dicts) with a specific [set of functions](../../sql-reference/functions/ym-dict-functions.md). -- [Plug-in (external) dictionaries](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md#dicts-external-dicts) with a [set of functions](../../sql-reference/functions/ext-dict-functions.md). +- [Dictionaries](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md#dicts-external-dicts) with a [set of functions](../../sql-reference/functions/ext-dict-functions.md). +- [Embedded dictionaries](../../sql-reference/dictionaries/internal-dicts.md#internal_dicts) with a specific [set of functions](../../sql-reference/functions/ym-dict-functions.md). diff --git a/docs/en/sql-reference/dictionaries/internal-dicts.md b/docs/en/sql-reference/dictionaries/internal-dicts.md index dbc12a576f7..252bac5f5e8 100644 --- a/docs/en/sql-reference/dictionaries/internal-dicts.md +++ b/docs/en/sql-reference/dictionaries/internal-dicts.md @@ -1,10 +1,10 @@ --- slug: /en/sql-reference/dictionaries/internal-dicts sidebar_position: 39 -sidebar_label: Internal Dictionaries +sidebar_label: Embedded Dictionaries --- -# Internal Dictionaries +# Embedded Dictionaries ClickHouse contains a built-in feature for working with a geobase. diff --git a/docs/en/sql-reference/statements/create/dictionary.md b/docs/en/sql-reference/statements/create/dictionary.md index b24ff480c2d..cb07f627018 100644 --- a/docs/en/sql-reference/statements/create/dictionary.md +++ b/docs/en/sql-reference/statements/create/dictionary.md @@ -5,9 +5,9 @@ sidebar_label: DICTIONARY title: "CREATE DICTIONARY" --- -Creates a new [external dictionary](../../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) with given [structure](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md), [source](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md), [layout](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md) and [lifetime](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md). +Creates a new [dictionary](../../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) with given [structure](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md), [source](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md), [layout](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md) and [lifetime](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md). -**Syntax** +## Syntax ``` sql CREATE [OR REPLACE] DICTIONARY [IF NOT EXISTS] [db.]dictionary_name [ON CLUSTER cluster] @@ -25,17 +25,25 @@ SETTINGS(setting_name = setting_value, setting_name = setting_value, ...) COMMENT 'Comment' ``` -External dictionary structure consists of attributes. Dictionary attributes are specified similarly to table columns. The only required attribute property is its type, all other properties may have default values. +The dictionary structure consists of attributes. Dictionary attributes are specified similarly to table columns. The only required attribute property is its type, all other properties may have default values. `ON CLUSTER` clause allows creating dictionary on a cluster, see [Distributed DDL](../../../sql-reference/distributed-ddl.md). Depending on dictionary [layout](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md) one or more attributes can be specified as dictionary keys. -For more information, see [External Dictionaries](../../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) section. +### SOURCE + +The source for a dictionary can be a: +- table in the current ClickHouse service +- table in a remote ClickHouse service +- file available by HTTP(S) +- another database + You can add a comment to the dictionary when you creating it using `COMMENT` clause. -**Example** +#### Create a dictionary from a table in the current ClickHouse service + Input table `source_table`: @@ -49,27 +57,26 @@ Input table `source_table`: Creating the dictionary: ``` sql -CREATE DICTIONARY dictionary_with_comment +CREATE DICTIONARY id_value_dictionary ( id UInt64, value String ) PRIMARY KEY id -SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() TABLE 'source_table')) +SOURCE(CLICKHOUSE(TABLE 'source_table')) LAYOUT(FLAT()) LIFETIME(MIN 0 MAX 1000) -COMMENT 'The temporary dictionary'; ``` Output the dictionary: ``` sql -SHOW CREATE DICTIONARY dictionary_with_comment; +SHOW CREATE DICTIONARY id_value_dictionary; ``` ```text ┌─statement───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ -│ CREATE DICTIONARY default.dictionary_with_comment +│ CREATE DICTIONARY default.id_value_dictionary ( `id` UInt64, `value` String @@ -77,10 +84,41 @@ SHOW CREATE DICTIONARY dictionary_with_comment; PRIMARY KEY id SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() TABLE 'source_table')) LIFETIME(MIN 0 MAX 1000) -LAYOUT(FLAT()) -COMMENT 'The temporary dictionary' │ +LAYOUT(FLAT()) | └─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ ``` +#### Create a dictionary from a table in a remote ClickHouse service +Creating the dictionary: + +``` sql +CREATE DICTIONARY id_value_dictionary +( + id UInt64, + value String +) +PRIMARY KEY id +SOURCE(CLICKHOUSE(HOST 'HOSTNAME' PORT '8443' TABLE 'source_table')) +LAYOUT(FLAT()) +LIFETIME(MIN 0 MAX 1000) +``` +#### Create a dictionary from a file available by HTTP(S) +```sql +statement: CREATE DICTIONARY default.taxi_zone_dictionary +( + `LocationID` UInt16 DEFAULT 0, + `Borough` String, + `Zone` String, + `service_zone` String +) +PRIMARY KEY LocationID +SOURCE(HTTP(URL 'https://datasets-documentation.s3.eu-west-3.amazonaws.com/nyc-taxi/taxi_zone_lookup.csv' FORMAT 'CSVWithNames')) +LIFETIME(MIN 0 MAX 0) +LAYOUT(HASHED()) +``` + +#### Create a dictionary from another database + + Output the comment to dictionary: @@ -96,4 +134,5 @@ SELECT comment FROM system.dictionaries WHERE name == 'dictionary_with_comment' **See Also** -- [system.dictionaries](../../../operations/system-tables/dictionaries.md) — This table contains information about [external dictionaries](../../../sql-reference/dictionaries/external-dictionaries/external-dicts.md). +- For more information, see the [Dictionaries](../../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) section. +- [system.dictionaries](../../../operations/system-tables/dictionaries.md) — This table contains information about [dictionaries](../../../sql-reference/dictionaries/external-dictionaries/external-dicts.md). From 3dd5df46159057dfbaf36bd9b29ebc22e05b2680 Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Wed, 2 Nov 2022 16:58:07 -0400 Subject: [PATCH 06/80] update examples --- .../external-dicts-dict-sources.md | 19 ++++--- .../statements/create/dictionary.md | 53 +++++++++---------- 2 files changed, 36 insertions(+), 36 deletions(-) diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md index b13295a84e7..8218c066530 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md @@ -6,6 +6,11 @@ sidebar_label: Dictionary Sources # Dictionary Sources +:::tip +If you are using a DICTIONARY with ClickHouse Cloud please use the DDL query option to create your dictionaries. +Also, verify the list of supported dictionary sources in the [Cloud Compatibility guide](/docs/en/whats-new/cloud-capabilities.md). +::: + A dictionary can be connected to ClickHouse from many different sources. If the dictionary is configured using an xml-file, the configuration looks like this: @@ -65,13 +70,13 @@ Types of sources (`source_type`): - [Executable Pool](#dicts-external_dicts_dict_sources-executable_pool) - [HTTP(s)](#dicts-external_dicts_dict_sources-http) - DBMS - - [ODBC](#dicts-external_dicts_dict_sources-odbc) - - [MySQL](#dicts-external_dicts_dict_sources-mysql) - - [ClickHouse](#dicts-external_dicts_dict_sources-clickhouse) - - [MongoDB](#dicts-external_dicts_dict_sources-mongodb) - - [Redis](#dicts-external_dicts_dict_sources-redis) - - [Cassandra](#dicts-external_dicts_dict_sources-cassandra) - - [PostgreSQL](#dicts-external_dicts_dict_sources-postgresql) + - [ODBC](#odbc) + - [MySQL](#mysql) + - [ClickHouse](#clickhouse) + - [MongoDB](#mongodb) + - [Redis](#redis) + - [Cassandra](#cassandra) + - [PostgreSQL](#postgresql) ## Local File diff --git a/docs/en/sql-reference/statements/create/dictionary.md b/docs/en/sql-reference/statements/create/dictionary.md index cb07f627018..37051f8031a 100644 --- a/docs/en/sql-reference/statements/create/dictionary.md +++ b/docs/en/sql-reference/statements/create/dictionary.md @@ -31,7 +31,7 @@ The dictionary structure consists of attributes. Dictionary attributes are speci Depending on dictionary [layout](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md) one or more attributes can be specified as dictionary keys. -### SOURCE +## SOURCE The source for a dictionary can be a: - table in the current ClickHouse service @@ -39,11 +39,7 @@ The source for a dictionary can be a: - file available by HTTP(S) - another database - -You can add a comment to the dictionary when you creating it using `COMMENT` clause. - -#### Create a dictionary from a table in the current ClickHouse service - +### Create a dictionary from a table in the current ClickHouse service Input table `source_table`: @@ -74,20 +70,29 @@ Output the dictionary: SHOW CREATE DICTIONARY id_value_dictionary; ``` -```text -┌─statement───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ -│ CREATE DICTIONARY default.id_value_dictionary +```response +CREATE DICTIONARY default.id_value_dictionary ( `id` UInt64, `value` String ) PRIMARY KEY id -SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() TABLE 'source_table')) +SOURCE(CLICKHOUSE(TABLE 'source_table')) LIFETIME(MIN 0 MAX 1000) -LAYOUT(FLAT()) | -└─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ +LAYOUT(FLAT()) ``` -#### Create a dictionary from a table in a remote ClickHouse service + +### Create a dictionary from a table in a remote ClickHouse service + +Input table (in the remote ClickHouse service) `source_table`: + +``` text +┌─id─┬─value──┐ +│ 1 │ First │ +│ 2 │ Second │ +└────┴────────┘ +``` + Creating the dictionary: ``` sql @@ -97,11 +102,13 @@ CREATE DICTIONARY id_value_dictionary value String ) PRIMARY KEY id -SOURCE(CLICKHOUSE(HOST 'HOSTNAME' PORT '8443' TABLE 'source_table')) +SOURCE(CLICKHOUSE(HOST 'HOSTNAME' PORT 9000 USER 'default' PASSWORD 'PASSWORD' TABLE 'source_table' DB 'default')) LAYOUT(FLAT()) LIFETIME(MIN 0 MAX 1000) ``` -#### Create a dictionary from a file available by HTTP(S) + +### Create a dictionary from a file available by HTTP(S) + ```sql statement: CREATE DICTIONARY default.taxi_zone_dictionary ( @@ -116,21 +123,9 @@ LIFETIME(MIN 0 MAX 0) LAYOUT(HASHED()) ``` -#### Create a dictionary from another database +### Create a dictionary from another database - - -Output the comment to dictionary: - -``` sql -SELECT comment FROM system.dictionaries WHERE name == 'dictionary_with_comment' AND database == currentDatabase(); -``` - -```text -┌─comment──────────────────┐ -│ The temporary dictionary │ -└──────────────────────────┘ -``` +Please see the details in [Dictionary sources](/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md/#dbms). **See Also** From df4868587b4002b33fa22c053438865fe8bac775 Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Wed, 2 Nov 2022 17:40:55 -0400 Subject: [PATCH 07/80] add note to use DDL --- .../external-dictionaries/external-dicts-dict-layout.md | 5 +++++ .../external-dictionaries/external-dicts-dict-lifetime.md | 5 +++++ .../external-dictionaries/external-dicts-dict-polygon.md | 5 +++++ .../external-dictionaries/external-dicts-dict-structure.md | 5 +++++ .../external-dictionaries/external-dicts-dict.md | 5 +++++ .../dictionaries/external-dictionaries/external-dicts.md | 5 +++++ 6 files changed, 30 insertions(+) diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md index 02a4ad57a3b..19884e7676d 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md @@ -24,6 +24,11 @@ ClickHouse generates an exception for errors with dictionaries. Examples of erro You can view the list of external dictionaries and their statuses in the [system.dictionaries](../../../operations/system-tables/dictionaries.md) table. +:::tip +If you are using a DICTIONARY with ClickHouse Cloud please use the DDL query option to create your dictionaries. +Also, verify the list of supported dictionary sources in the [Cloud Compatibility guide](/docs/en/whats-new/cloud-capabilities.md). +::: + The configuration looks like this: ``` xml diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md index 6e4c8c4b94e..f0d6cb64a0b 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md @@ -12,6 +12,11 @@ Dictionary updates (other than loading for first use) do not block queries. Duri Example of settings: +:::tip +If you are using a DICTIONARY with ClickHouse Cloud please use the DDL query option to create your dictionaries. +Also, verify the list of supported dictionary sources in the [Cloud Compatibility guide](/docs/en/whats-new/cloud-capabilities.md). +::: + ``` xml ... diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.md index e5ee48c9166..2124f816883 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.md @@ -10,6 +10,11 @@ For example: defining a city area by geographical coordinates. Example of a polygon dictionary configuration: +:::tip +If you are using a DICTIONARY with ClickHouse Cloud please use the DDL query option to create your dictionaries. +Also, verify the list of supported dictionary sources in the [Cloud Compatibility guide](/docs/en/whats-new/cloud-capabilities.md). +::: + ``` xml diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md index 40664e0ced3..20056c6ac9c 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md @@ -6,6 +6,11 @@ sidebar_label: Dictionary Key and Fields # Dictionary Key and Fields +:::tip +If you are using a DICTIONARY with ClickHouse Cloud please use the DDL query option to create your dictionaries. +Also, verify the list of supported dictionary sources in the [Cloud Compatibility guide](/docs/en/whats-new/cloud-capabilities.md). +::: + The `structure` clause describes the dictionary key and fields available for queries. XML description: diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict.md index 90e447385f8..f95a3593ae4 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict.md @@ -6,6 +6,11 @@ sidebar_label: Configuring a Dictionary # Configuring a Dictionary +:::tip +If you are using a DICTIONARY with ClickHouse Cloud please use the DDL query option to create your dictionaries. +Also, verify the list of supported dictionary sources in the [Cloud Compatibility guide](/docs/en/whats-new/cloud-capabilities.md). +::: + If dictionary is configured using xml file, than dictionary configuration has the following structure: ``` xml diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts.md index 1d97a12f452..19bb7474d40 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts.md @@ -6,6 +6,11 @@ sidebar_label: General Description # Dictionaries +:::tip +If you are using a DICTIONARY with ClickHouse Cloud please use the DDL query option to create your dictionaries. +Also, verify the list of supported dictionary sources in the [Cloud Compatibility guide](/docs/en/whats-new/cloud-capabilities.md). +::: + You can add your own dictionaries from various data sources. The source for a dictionary can be a ClickHouse table, a DDL query, a local text or executable file, an HTTP(s) resource, or another DBMS. For more information, see “[Dictionary Sources](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md)”. ClickHouse: From 7271d4d122e01249e6995011c51421285d3ab8d0 Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Wed, 2 Nov 2022 17:52:35 -0400 Subject: [PATCH 08/80] add note to embedded dict --- docs/en/sql-reference/dictionaries/internal-dicts.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/en/sql-reference/dictionaries/internal-dicts.md b/docs/en/sql-reference/dictionaries/internal-dicts.md index 252bac5f5e8..f26c60880a4 100644 --- a/docs/en/sql-reference/dictionaries/internal-dicts.md +++ b/docs/en/sql-reference/dictionaries/internal-dicts.md @@ -3,9 +3,12 @@ slug: /en/sql-reference/dictionaries/internal-dicts sidebar_position: 39 sidebar_label: Embedded Dictionaries --- +import SelfManaged from '@site/docs/en/_snippets/_self_managed_only_no_roadmap.md'; # Embedded Dictionaries + + ClickHouse contains a built-in feature for working with a geobase. This allows you to: From cf05ac84dc74adf3daa4370625d3082ce6a6ff91 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Fri, 4 Nov 2022 11:12:45 +0100 Subject: [PATCH 09/80] Add no-fasttest tag --- .../0_stateless/02473_extract_low_cardinality_from_json.sql | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02473_extract_low_cardinality_from_json.sql b/tests/queries/0_stateless/02473_extract_low_cardinality_from_json.sql index 0a0ae398227..664c52e772f 100644 --- a/tests/queries/0_stateless/02473_extract_low_cardinality_from_json.sql +++ b/tests/queries/0_stateless/02473_extract_low_cardinality_from_json.sql @@ -1,2 +1,3 @@ +-- Tags: no-fasttest SELECT JSONExtract('{"a" : {"b" : {"c" : 1, "d" : "str"}}}', 'Tuple( a LowCardinality(String), b LowCardinality(String), c LowCardinality(String), d LowCardinality(String))'); -SELECT JSONExtract('{"a" : {"b" : {"c" : 1, "d" : "str"}}}', 'Tuple( a String, b LowCardinality(String), c LowCardinality(String), d LowCardinality(String))'); \ No newline at end of file +SELECT JSONExtract('{"a" : {"b" : {"c" : 1, "d" : "str"}}}', 'Tuple( a String, b LowCardinality(String), c LowCardinality(String), d LowCardinality(String))'); From 34f90ff6efb6a773cbca1d360d5c7469df92eb8e Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Mon, 7 Nov 2022 09:26:50 -0500 Subject: [PATCH 10/80] update pages that refer to dictionaries --- .../table-engines/integrations/index.md | 2 +- .../table-engines/integrations/mysql.md | 2 +- .../engines/table-engines/integrations/odbc.md | 2 +- .../table-engines/integrations/postgresql.md | 2 +- .../example-datasets/cell-towers.md | 2 +- docs/en/interfaces/formats.md | 2 +- docs/en/operations/named-collections.md | 6 +++--- .../settings.md | 4 ++-- .../external-dicts-dict-layout.md | 2 +- .../external-dicts-dict-polygon.md | 2 +- .../functions/ext-dict-functions.md | 18 +++++++++--------- .../functions/ym-dict-functions.md | 2 +- .../en/sql-reference/statements/select/join.md | 2 +- docs/en/sql-reference/statements/show.md | 2 +- docs/en/sql-reference/table-functions/mysql.md | 2 +- docs/en/sql-reference/table-functions/odbc.md | 2 +- .../table-functions/postgresql.md | 2 +- 17 files changed, 28 insertions(+), 28 deletions(-) diff --git a/docs/en/engines/table-engines/integrations/index.md b/docs/en/engines/table-engines/integrations/index.md index 7e67bcb6249..09e89209ea9 100644 --- a/docs/en/engines/table-engines/integrations/index.md +++ b/docs/en/engines/table-engines/integrations/index.md @@ -6,7 +6,7 @@ sidebar_label: Integrations # Table Engines for Integrations -ClickHouse provides various means for integrating with external systems, including table engines. Like with all other table engines, the configuration is done using `CREATE TABLE` or `ALTER TABLE` queries. Then from a user perspective, the configured integration looks like a normal table, but queries to it are proxied to the external system. This transparent querying is one of the key advantages of this approach over alternative integration methods, like external dictionaries or table functions, which require to use custom query methods on each use. +ClickHouse provides various means for integrating with external systems, including table engines. Like with all other table engines, the configuration is done using `CREATE TABLE` or `ALTER TABLE` queries. Then from a user perspective, the configured integration looks like a normal table, but queries to it are proxied to the external system. This transparent querying is one of the key advantages of this approach over alternative integration methods, like dictionaries or table functions, which require to use custom query methods on each use. List of supported integrations: diff --git a/docs/en/engines/table-engines/integrations/mysql.md b/docs/en/engines/table-engines/integrations/mysql.md index 7c9c4cfea53..9f637c50989 100644 --- a/docs/en/engines/table-engines/integrations/mysql.md +++ b/docs/en/engines/table-engines/integrations/mysql.md @@ -180,6 +180,6 @@ Default value: `300`. ## See Also {#see-also} - [The mysql table function](../../../sql-reference/table-functions/mysql.md) -- [Using MySQL as a source of external dictionary](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md#dicts-external_dicts_dict_sources-mysql) +- [Using MySQL as a dictionary source](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md#dicts-external_dicts_dict_sources-mysql) [Original article](https://clickhouse.com/docs/en/engines/table-engines/integrations/mysql/) diff --git a/docs/en/engines/table-engines/integrations/odbc.md b/docs/en/engines/table-engines/integrations/odbc.md index 043d5170654..e21a64bc5b2 100644 --- a/docs/en/engines/table-engines/integrations/odbc.md +++ b/docs/en/engines/table-engines/integrations/odbc.md @@ -126,7 +126,7 @@ SELECT * FROM odbc_t ## See Also {#see-also} -- [ODBC external dictionaries](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md#dicts-external_dicts_dict_sources-odbc) +- [ODBC dictionaries](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md#dicts-external_dicts_dict_sources-odbc) - [ODBC table function](../../../sql-reference/table-functions/odbc.md) [Original article](https://clickhouse.com/docs/en/engines/table-engines/integrations/odbc/) diff --git a/docs/en/engines/table-engines/integrations/postgresql.md b/docs/en/engines/table-engines/integrations/postgresql.md index 4bb8033de9c..c07512cf0ce 100644 --- a/docs/en/engines/table-engines/integrations/postgresql.md +++ b/docs/en/engines/table-engines/integrations/postgresql.md @@ -174,6 +174,6 @@ CREATE TABLE pg_table_schema_with_dots (a UInt32) **See Also** - [The `postgresql` table function](../../../sql-reference/table-functions/postgresql.md) -- [Using PostgreSQL as a source of external dictionary](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md#dicts-external_dicts_dict_sources-postgresql) +- [Using PostgreSQL as a dictionary source](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md#dicts-external_dicts_dict_sources-postgresql) [Original article](https://clickhouse.com/docs/en/engines/table-engines/integrations/postgresql/) diff --git a/docs/en/getting-started/example-datasets/cell-towers.md b/docs/en/getting-started/example-datasets/cell-towers.md index 3d993c3e224..26bed30e3b8 100644 --- a/docs/en/getting-started/example-datasets/cell-towers.md +++ b/docs/en/getting-started/example-datasets/cell-towers.md @@ -129,7 +129,7 @@ SELECT mcc, count() FROM cell_towers GROUP BY mcc ORDER BY count() DESC LIMIT 10 So, the top countries are: the USA, Germany, and Russia. -You may want to create an [External Dictionary](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) in ClickHouse to decode these values. +You may want to create a [Dictionary](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) in ClickHouse to decode these values. ## Use case: Incorporate geo data {#use-case} diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index 58e986cc2f3..56708def497 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -5,7 +5,7 @@ sidebar_label: Input and Output Formats title: Formats for Input and Output Data --- -ClickHouse can accept and return data in various formats. A format supported for input can be used to parse the data provided to `INSERT`s, to perform `SELECT`s from a file-backed table such as File, URL or HDFS, or to read an external dictionary. A format supported for output can be used to arrange the +ClickHouse can accept and return data in various formats. A format supported for input can be used to parse the data provided to `INSERT`s, to perform `SELECT`s from a file-backed table such as File, URL or HDFS, or to read a dictionary. A format supported for output can be used to arrange the results of a `SELECT`, and to perform `INSERT`s into a file-backed table. The supported formats are: diff --git a/docs/en/operations/named-collections.md b/docs/en/operations/named-collections.md index f605045a0ad..cbb8d0a4c02 100644 --- a/docs/en/operations/named-collections.md +++ b/docs/en/operations/named-collections.md @@ -130,7 +130,7 @@ SHOW TABLES FROM mydatabase; └────────┘ ``` -### Example of using named collections with an external dictionary with source MySQL +### Example of using named collections with a dictionary with source MySQL ```sql CREATE DICTIONARY dict (A Int64, B String) @@ -213,7 +213,7 @@ SHOW TABLES FROM mydatabase └──────┘ ``` -### Example of using named collections with an external dictionary with source POSTGRESQL +### Example of using named collections with a dictionary with source POSTGRESQL ```sql CREATE DICTIONARY dict (a Int64, b String) @@ -270,7 +270,7 @@ SELECT * FROM remote(remote1, database = default, table = test); └───┴───┘ ``` -### Example of using named collections with an external dictionary with source ClickHouse +### Example of using named collections with a dictionary with source ClickHouse ```sql CREATE DICTIONARY dict(a Int64, b String) diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index dcda7536935..5faf3819d7e 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -268,14 +268,14 @@ The path to the table in ZooKeeper. ## dictionaries_config {#server_configuration_parameters-dictionaries_config} -The path to the config file for external dictionaries. +The path to the config file for dictionaries. Path: - Specify the absolute path or the path relative to the server config file. - The path can contain wildcards \* and ?. -See also “[External dictionaries](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md)”. +See also “[Dictionaries](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md)”. **Example** diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md index 19884e7676d..49a8620b609 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md @@ -22,7 +22,7 @@ ClickHouse generates an exception for errors with dictionaries. Examples of erro - The dictionary being accessed could not be loaded. - Error querying a `cached` dictionary. -You can view the list of external dictionaries and their statuses in the [system.dictionaries](../../../operations/system-tables/dictionaries.md) table. +You can view the list of dictionaries and their statuses in the [system.dictionaries](../../../operations/system-tables/dictionaries.md) table. :::tip If you are using a DICTIONARY with ClickHouse Cloud please use the DDL query option to create your dictionaries. diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.md index 2124f816883..42c4a7faa73 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.md @@ -83,7 +83,7 @@ To respond to the query, there is a corresponding cell, and the index for the po - `POLYGON`. Synonym to `POLYGON_INDEX_CELL`. -Dictionary queries are carried out using standard [functions](../../../sql-reference/functions/ext-dict-functions.md) for working with external dictionaries. +Dictionary queries are carried out using standard [functions](../../../sql-reference/functions/ext-dict-functions.md) for working with dictionaries. An important difference is that here the keys will be the points for which you want to find the polygon containing them. **Example** diff --git a/docs/en/sql-reference/functions/ext-dict-functions.md b/docs/en/sql-reference/functions/ext-dict-functions.md index 728e26d6958..1c33638da09 100644 --- a/docs/en/sql-reference/functions/ext-dict-functions.md +++ b/docs/en/sql-reference/functions/ext-dict-functions.md @@ -1,20 +1,20 @@ --- slug: /en/sql-reference/functions/ext-dict-functions sidebar_position: 58 -sidebar_label: External Dictionaries +sidebar_label: Dictionaries --- +# Functions for Working with Dictionaries + :::note For dictionaries created with [DDL queries](../../sql-reference/statements/create/dictionary.md), the `dict_name` parameter must be fully specified, like `.`. Otherwise, the current database is used. ::: -# Functions for Working with External Dictionaries - -For information on connecting and configuring external dictionaries, see [External dictionaries](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md). +For information on connecting and configuring dictionaries, see [Dictionaries](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md). ## dictGet, dictGetOrDefault, dictGetOrNull -Retrieves values from an external dictionary. +Retrieves values from a dictionary. ``` sql dictGet('dict_name', attr_names, id_expr) @@ -52,7 +52,7 @@ Create a text file `ext-dict-test.csv` containing the following: The first column is `id`, the second column is `c1`. -Configure the external dictionary: +Configure the dictionary: ``` xml @@ -112,7 +112,7 @@ Create a text file `ext-dict-mult.csv` containing the following: The first column is `id`, the second is `c1`, the third is `c2`. -Configure the external dictionary: +Configure the dictionary: ``` xml @@ -185,7 +185,7 @@ INSERT INTO range_key_dictionary_source_table VALUES(2, toDate('2019-05-20'), to INSERT INTO range_key_dictionary_source_table VALUES(3, toDate('2019-05-20'), toDate('2019-05-20'), 'Third', 'Third'); ``` -Create the external dictionary: +Create the dictionary: ```sql CREATE DICTIONARY range_key_dictionary @@ -226,7 +226,7 @@ Result: **See Also** -- [External Dictionaries](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) +- [Dictionaries](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) ## dictHas diff --git a/docs/en/sql-reference/functions/ym-dict-functions.md b/docs/en/sql-reference/functions/ym-dict-functions.md index 04df3db571e..f92ad5db2ad 100644 --- a/docs/en/sql-reference/functions/ym-dict-functions.md +++ b/docs/en/sql-reference/functions/ym-dict-functions.md @@ -131,7 +131,7 @@ Type: `UInt32`. ### regionToPopulation(id\[, geobase\]) Gets the population for a region. -The population can be recorded in files with the geobase. See the section “External dictionaries”. +The population can be recorded in files with the geobase. See the section “Dictionaries”. If the population is not recorded for the region, it returns 0. In the geobase, the population might be recorded for child regions, but not for parent regions. diff --git a/docs/en/sql-reference/statements/select/join.md b/docs/en/sql-reference/statements/select/join.md index 1890ff081d8..62d3e9fd69a 100644 --- a/docs/en/sql-reference/statements/select/join.md +++ b/docs/en/sql-reference/statements/select/join.md @@ -282,7 +282,7 @@ Each time a query is run with the same `JOIN`, the subquery is run again because In some cases, it is more efficient to use [IN](../../../sql-reference/operators/in.md) instead of `JOIN`. -If you need a `JOIN` for joining with dimension tables (these are relatively small tables that contain dimension properties, such as names for advertising campaigns), a `JOIN` might not be very convenient due to the fact that the right table is re-accessed for every query. For such cases, there is an “external dictionaries” feature that you should use instead of `JOIN`. For more information, see the [External dictionaries](../../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) section. +If you need a `JOIN` for joining with dimension tables (these are relatively small tables that contain dimension properties, such as names for advertising campaigns), a `JOIN` might not be very convenient due to the fact that the right table is re-accessed for every query. For such cases, there is a “dictionaries” feature that you should use instead of `JOIN`. For more information, see the [Dictionaries](../../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) section. ### Memory Limitations diff --git a/docs/en/sql-reference/statements/show.md b/docs/en/sql-reference/statements/show.md index 87248bb115b..0efad3d460f 100644 --- a/docs/en/sql-reference/statements/show.md +++ b/docs/en/sql-reference/statements/show.md @@ -198,7 +198,7 @@ Result: ## SHOW DICTIONARIES -Displays a list of [external dictionaries](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md). +Displays a list of [Dictionaries](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md). ``` sql SHOW DICTIONARIES [FROM ] [LIKE ''] [LIMIT ] [INTO OUTFILE ] [FORMAT ] diff --git a/docs/en/sql-reference/table-functions/mysql.md b/docs/en/sql-reference/table-functions/mysql.md index f867cda45bd..de1567c052e 100644 --- a/docs/en/sql-reference/table-functions/mysql.md +++ b/docs/en/sql-reference/table-functions/mysql.md @@ -110,5 +110,5 @@ SELECT * FROM mysql('localhost:3306', 'test', 'test', 'bayonet', '123'); **See Also** - [The ‘MySQL’ table engine](../../engines/table-engines/integrations/mysql.md) -- [Using MySQL as a source of external dictionary](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md#dicts-external_dicts_dict_sources-mysql) +- [Using MySQL as a dictionary source](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md#dicts-external_dicts_dict_sources-mysql) diff --git a/docs/en/sql-reference/table-functions/odbc.md b/docs/en/sql-reference/table-functions/odbc.md index f8c46fe44d8..7e13424bc8a 100644 --- a/docs/en/sql-reference/table-functions/odbc.md +++ b/docs/en/sql-reference/table-functions/odbc.md @@ -101,5 +101,5 @@ SELECT * FROM odbc('DSN=mysqlconn', 'test', 'test') ## See Also -- [ODBC external dictionaries](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md#dicts-external_dicts_dict_sources-odbc) +- [ODBC dictionaries](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md#dicts-external_dicts_dict_sources-odbc) - [ODBC table engine](../../engines/table-engines/integrations/odbc.md). diff --git a/docs/en/sql-reference/table-functions/postgresql.md b/docs/en/sql-reference/table-functions/postgresql.md index 367edbe9a00..e98869de739 100644 --- a/docs/en/sql-reference/table-functions/postgresql.md +++ b/docs/en/sql-reference/table-functions/postgresql.md @@ -130,6 +130,6 @@ CREATE TABLE pg_table_schema_with_dots (a UInt32) **See Also** - [The PostgreSQL table engine](../../engines/table-engines/integrations/postgresql.md) -- [Using PostgreSQL as a source of external dictionary](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md#dicts-external_dicts_dict_sources-postgresql) +- [Using PostgreSQL as a dictionary source](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md#dicts-external_dicts_dict_sources-postgresql) [Original article](https://clickhouse.com/docs/en/sql-reference/table-functions/postgresql/) From 2daec0b45e781366178a857319990898dd8ef14e Mon Sep 17 00:00:00 2001 From: serxa Date: Mon, 7 Nov 2022 18:05:40 +0000 Subject: [PATCH 11/80] S3 request per second rate throttling + refactoring --- src/Backups/BackupIO_S3.cpp | 9 ++-- src/Backups/BackupIO_S3.h | 3 +- src/Common/ProfileEvents.cpp | 2 +- src/Common/Throttler.cpp | 2 - src/Common/Throttler.h | 2 + src/Coordination/KeeperSnapshotManagerS3.cpp | 6 ++- src/Core/Settings.h | 4 ++ .../ObjectStorages/S3/S3ObjectStorage.cpp | 15 ++++--- src/Disks/ObjectStorages/S3/diskSettings.cpp | 34 +++++++++++---- src/Disks/ObjectStorages/S3/diskSettings.h | 2 +- src/Disks/ObjectStorages/S3/parseConfig.h | 27 ------------ .../ObjectStorages/S3/registerDiskS3.cpp | 13 ++---- src/IO/ReadBufferFromS3.cpp | 10 ++--- src/IO/ReadBufferFromS3.h | 11 ++--- src/IO/S3/PocoHTTPClient.cpp | 26 +++++++++++- src/IO/S3/PocoHTTPClient.h | 17 +++++++- src/IO/S3/tests/gtest_aws_s3_client.cpp | 8 +++- src/IO/S3Common.cpp | 31 ++++++++++++-- src/IO/S3Common.h | 5 ++- src/Storages/StorageS3.cpp | 42 ++++++++++--------- src/Storages/StorageS3.h | 7 ++-- src/Storages/StorageS3Settings.cpp | 21 ++++++++++ src/Storages/StorageS3Settings.h | 7 +++- 23 files changed, 198 insertions(+), 106 deletions(-) delete mode 100644 src/Disks/ObjectStorages/S3/parseConfig.h diff --git a/src/Backups/BackupIO_S3.cpp b/src/Backups/BackupIO_S3.cpp index 12038a8a30c..3f723c98a2a 100644 --- a/src/Backups/BackupIO_S3.cpp +++ b/src/Backups/BackupIO_S3.cpp @@ -46,7 +46,7 @@ namespace context->getRemoteHostFilter(), static_cast(context->getGlobalContext()->getSettingsRef().s3_max_redirects), context->getGlobalContext()->getSettingsRef().enable_s3_requests_logging, - /* for_disk_s3 = */ false); + /* for_disk_s3 = */ false, /* get_request_throttler = */ {}, /* put_request_throttler = */ {}); client_configuration.endpointOverride = s3_uri.endpoint; client_configuration.maxConnections = static_cast(context->getSettingsRef().s3_max_connections); @@ -86,8 +86,8 @@ BackupReaderS3::BackupReaderS3( const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, const ContextPtr & context_) : s3_uri(s3_uri_) , client(makeS3Client(s3_uri_, access_key_id_, secret_access_key_, context_)) - , max_single_read_retries(context_->getSettingsRef().s3_max_single_read_retries) , read_settings(context_->getReadSettings()) + , rw_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString()).rw_settings) { } @@ -115,7 +115,7 @@ UInt64 BackupReaderS3::getFileSize(const String & file_name) std::unique_ptr BackupReaderS3::readFile(const String & file_name) { return std::make_unique( - client, s3_uri.bucket, fs::path(s3_uri.key) / file_name, s3_uri.version_id, max_single_read_retries, read_settings); + client, s3_uri.bucket, fs::path(s3_uri.key) / file_name, s3_uri.version_id, rw_settings, read_settings); } @@ -123,7 +123,6 @@ BackupWriterS3::BackupWriterS3( const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, const ContextPtr & context_) : s3_uri(s3_uri_) , client(makeS3Client(s3_uri_, access_key_id_, secret_access_key_, context_)) - , max_single_read_retries(context_->getSettingsRef().s3_max_single_read_retries) , read_settings(context_->getReadSettings()) , rw_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString()).rw_settings) { @@ -318,7 +317,7 @@ bool BackupWriterS3::fileContentsEqual(const String & file_name, const String & try { auto in = std::make_unique( - client, s3_uri.bucket, fs::path(s3_uri.key) / file_name, s3_uri.version_id, max_single_read_retries, read_settings); + client, s3_uri.bucket, fs::path(s3_uri.key) / file_name, s3_uri.version_id, rw_settings, read_settings); String actual_file_contents(expected_file_contents.size(), ' '); return (in->read(actual_file_contents.data(), actual_file_contents.size()) == actual_file_contents.size()) && (actual_file_contents == expected_file_contents) && in->eof(); diff --git a/src/Backups/BackupIO_S3.h b/src/Backups/BackupIO_S3.h index 471ddcc06e6..17a365da396 100644 --- a/src/Backups/BackupIO_S3.h +++ b/src/Backups/BackupIO_S3.h @@ -39,8 +39,8 @@ public: private: S3::URI s3_uri; std::shared_ptr client; - UInt64 max_single_read_retries; ReadSettings read_settings; + S3Settings::ReadWriteSettings rw_settings; }; @@ -82,7 +82,6 @@ private: S3::URI s3_uri; std::shared_ptr client; - UInt64 max_single_read_retries; ReadSettings read_settings; S3Settings::ReadWriteSettings rw_settings; }; diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index 2f801e496fa..a3ceecf8041 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -61,7 +61,7 @@ M(NetworkSendElapsedMicroseconds, "Total time spent waiting for data to send to network or sending data to network. Only ClickHouse-related network interaction is included, not by 3rd party libraries..") \ M(NetworkReceiveBytes, "Total number of bytes received from network. Only ClickHouse-related network interaction is included, not by 3rd party libraries.") \ M(NetworkSendBytes, "Total number of bytes send to network. Only ClickHouse-related network interaction is included, not by 3rd party libraries.") \ - M(ThrottlerSleepMicroseconds, "Total time a query was sleeping to conform the 'max_network_bandwidth' setting.") \ + M(ThrottlerSleepMicroseconds, "Total time a query was sleeping to conform 'max_network_bandwidth' and other throttling settings.") \ \ M(QueryMaskingRulesMatch, "Number of times query masking rules was successfully matched.") \ \ diff --git a/src/Common/Throttler.cpp b/src/Common/Throttler.cpp index b38777efc03..79625d4894c 100644 --- a/src/Common/Throttler.cpp +++ b/src/Common/Throttler.cpp @@ -20,8 +20,6 @@ namespace ErrorCodes /// Just 10^9. static constexpr auto NS = 1000000000UL; -static const size_t default_burst_seconds = 1; - Throttler::Throttler(size_t max_speed_, const std::shared_ptr & parent_) : max_speed(max_speed_) , max_burst(max_speed_ * default_burst_seconds) diff --git a/src/Common/Throttler.h b/src/Common/Throttler.h index 9b6eff13506..708e9b939fa 100644 --- a/src/Common/Throttler.h +++ b/src/Common/Throttler.h @@ -17,6 +17,8 @@ namespace DB class Throttler { public: + static const size_t default_burst_seconds = 1; + Throttler(size_t max_speed_, size_t max_burst_, const std::shared_ptr & parent_ = nullptr) : max_speed(max_speed_), max_burst(max_burst_), limit_exceeded_exception_message(""), tokens(max_burst), parent(parent_) {} diff --git a/src/Coordination/KeeperSnapshotManagerS3.cpp b/src/Coordination/KeeperSnapshotManagerS3.cpp index 2e19d496407..8777ef341ff 100644 --- a/src/Coordination/KeeperSnapshotManagerS3.cpp +++ b/src/Coordination/KeeperSnapshotManagerS3.cpp @@ -93,7 +93,7 @@ void KeeperSnapshotManagerS3::updateS3Configuration(const Poco::Util::AbstractCo auth_settings.region, RemoteHostFilter(), s3_max_redirects, enable_s3_requests_logging, - /* for_disk_s3 = */ false); + /* for_disk_s3 = */ false, /* get_request_throttler = */ {}, /* put_request_throttler = */ {}); client_configuration.endpointOverride = new_uri.endpoint; @@ -194,13 +194,15 @@ void KeeperSnapshotManagerS3::uploadSnapshotImpl(const std::string & snapshot_pa lock_writer.finalize(); // We read back the written UUID, if it's the same we can upload the file + S3Settings::ReadWriteSettings rw_settings; + rw_settings.max_single_read_retries = 1; ReadBufferFromS3 lock_reader { s3_client->client, s3_client->uri.bucket, lock_file, "", - 1, + rw_settings, {} }; diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 5c6ca1a1d57..e36ff1985fb 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -90,6 +90,10 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) M(UInt64, s3_max_unexpected_write_error_retries, 4, "The maximum number of retries in case of unexpected errors during S3 write.", 0) \ M(UInt64, s3_max_redirects, 10, "Max number of S3 redirects hops allowed.", 0) \ M(UInt64, s3_max_connections, 1024, "The maximum number of connections per server.", 0) \ + M(UInt64, s3_max_get_rps, 0, "Limit on S3 GET request per second rate. Zero means unlimited.", 0) \ + M(UInt64, s3_max_get_burst, 0, "Max number of requests that can be issued simultaneously before hitting request per second limit. By default (0) equals to `s3_max_get_rps`", 0) \ + M(UInt64, s3_max_put_rps, 0, "Limit on S3 PUT request per second rate. Zero means unlimited.", 0) \ + M(UInt64, s3_max_put_burst, 0, "Max number of requests that can be issued simultaneously before hitting request per second limit. By default (0) equals to `s3_max_put_rps`", 0) \ M(Bool, s3_truncate_on_insert, false, "Enables or disables truncate before insert in s3 engine tables.", 0) \ M(Bool, s3_create_new_file_on_insert, false, "Enables or disables creating a new file on each insert in s3 engine tables", 0) \ M(Bool, s3_check_objects_after_upload, false, "Check each uploaded object to s3 with head request to be sure that upload was successful", 0) \ diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index 0c421ee03d7..21b7ff4f0f1 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -175,7 +175,7 @@ std::unique_ptr S3ObjectStorage::readObjects( /// NOLINT bucket, path, version_id, - settings_ptr->s3_settings.max_single_read_retries, + settings_ptr->s3_settings, disk_read_settings, /* use_external_buffer */true, /* offset */0, @@ -212,7 +212,7 @@ std::unique_ptr S3ObjectStorage::readObject( /// NOLINT bucket, object.absolute_path, version_id, - settings_ptr->s3_settings.max_single_read_retries, + settings_ptr->s3_settings, patchSettings(read_settings)); } @@ -627,17 +627,20 @@ void S3ObjectStorage::startup() void S3ObjectStorage::applyNewSettings(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) { - s3_settings.set(getSettings(config, config_prefix, context)); - client.set(getClient(config, config_prefix, context)); + auto new_s3_settings = getSettings(config, config_prefix, context); + auto new_client = getClient(config, config_prefix, context, *new_s3_settings); + s3_settings.set(std::move(new_s3_settings)); + client.set(std::move(new_client)); applyRemoteThrottlingSettings(context); } std::unique_ptr S3ObjectStorage::cloneObjectStorage( const std::string & new_namespace, const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) { + auto new_s3_settings = getSettings(config, config_prefix, context); + auto new_client = getClient(config, config_prefix, context, *new_s3_settings); return std::make_unique( - getClient(config, config_prefix, context), - getSettings(config, config_prefix, context), + std::move(new_client), std::move(new_s3_settings), version_id, s3_capabilities, new_namespace, S3::URI(Poco::URI(config.getString(config_prefix + ".endpoint"))).endpoint); } diff --git a/src/Disks/ObjectStorages/S3/diskSettings.cpp b/src/Disks/ObjectStorages/S3/diskSettings.cpp index e61987163d2..707675c01fd 100644 --- a/src/Disks/ObjectStorages/S3/diskSettings.cpp +++ b/src/Disks/ObjectStorages/S3/diskSettings.cpp @@ -4,6 +4,7 @@ #include #include +#include #include #include #include @@ -32,14 +33,23 @@ namespace ErrorCodes std::unique_ptr getSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context) { + const Settings & settings = context->getSettingsRef(); S3Settings::ReadWriteSettings rw_settings; - rw_settings.max_single_read_retries = config.getUInt64(config_prefix + ".s3_max_single_read_retries", context->getSettingsRef().s3_max_single_read_retries); - rw_settings.min_upload_part_size = config.getUInt64(config_prefix + ".s3_min_upload_part_size", context->getSettingsRef().s3_min_upload_part_size); - rw_settings.upload_part_size_multiply_factor = config.getUInt64(config_prefix + ".s3_upload_part_size_multiply_factor", context->getSettingsRef().s3_upload_part_size_multiply_factor); - rw_settings.upload_part_size_multiply_parts_count_threshold = config.getUInt64(config_prefix + ".s3_upload_part_size_multiply_parts_count_threshold", context->getSettingsRef().s3_upload_part_size_multiply_parts_count_threshold); - rw_settings.max_single_part_upload_size = config.getUInt64(config_prefix + ".s3_max_single_part_upload_size", context->getSettingsRef().s3_max_single_part_upload_size); - rw_settings.check_objects_after_upload = config.getUInt64(config_prefix + ".s3_check_objects_after_upload", context->getSettingsRef().s3_check_objects_after_upload); - rw_settings.max_unexpected_write_error_retries = config.getUInt64(config_prefix + ".s3_max_unexpected_write_error_retries", context->getSettingsRef().s3_max_unexpected_write_error_retries); + rw_settings.max_single_read_retries = config.getUInt64(config_prefix + ".s3_max_single_read_retries", settings.s3_max_single_read_retries); + rw_settings.min_upload_part_size = config.getUInt64(config_prefix + ".s3_min_upload_part_size", settings.s3_min_upload_part_size); + rw_settings.upload_part_size_multiply_factor = config.getUInt64(config_prefix + ".s3_upload_part_size_multiply_factor", settings.s3_upload_part_size_multiply_factor); + rw_settings.upload_part_size_multiply_parts_count_threshold = config.getUInt64(config_prefix + ".s3_upload_part_size_multiply_parts_count_threshold", settings.s3_upload_part_size_multiply_parts_count_threshold); + rw_settings.max_single_part_upload_size = config.getUInt64(config_prefix + ".s3_max_single_part_upload_size", settings.s3_max_single_part_upload_size); + rw_settings.check_objects_after_upload = config.getUInt64(config_prefix + ".s3_check_objects_after_upload", settings.s3_check_objects_after_upload); + rw_settings.max_unexpected_write_error_retries = config.getUInt64(config_prefix + ".s3_max_unexpected_write_error_retries", settings.s3_max_unexpected_write_error_retries); + + // NOTE: it would be better to reuse old throttlers to avoid losing token bucket state on every config reload, which could lead to exceeding limit for short time. But it is good enough unless very high `burst` values are used. + if (UInt64 max_get_rps = config.getUInt64(config_prefix + ".s3_max_get_rps", settings.s3_max_get_rps)) + rw_settings.get_request_throttler = std::make_shared( + max_get_rps, config.getUInt64(config_prefix + ".s3_max_get_burst", settings.s3_max_get_burst ? settings.s3_max_get_burst : Throttler::default_burst_seconds * max_get_rps)); + if (UInt64 max_put_rps = config.getUInt64(config_prefix + ".s3_max_put_rps", settings.s3_max_put_rps)) + rw_settings.put_request_throttler = std::make_shared( + max_put_rps, config.getUInt64(config_prefix + ".s3_max_put_burst", settings.s3_max_put_burst ? settings.s3_max_put_burst : Throttler::default_burst_seconds * max_put_rps)); return std::make_unique( rw_settings, @@ -112,14 +122,20 @@ std::shared_ptr getProxyConfiguration(const String & pre } -std::unique_ptr getClient(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context) +std::unique_ptr getClient( + const Poco::Util::AbstractConfiguration & config, + const String & config_prefix, + ContextPtr context, + const S3ObjectStorageSettings & settings) { S3::PocoHTTPClientConfiguration client_configuration = S3::ClientFactory::instance().createClientConfiguration( config.getString(config_prefix + ".region", ""), context->getRemoteHostFilter(), static_cast(context->getGlobalContext()->getSettingsRef().s3_max_redirects), context->getGlobalContext()->getSettingsRef().enable_s3_requests_logging, - /* for_disk_s3 = */ true); + /* for_disk_s3 = */ true, + settings.s3_settings.get_request_throttler, + settings.s3_settings.put_request_throttler); S3::URI uri(Poco::URI(config.getString(config_prefix + ".endpoint"))); if (uri.key.back() != '/') diff --git a/src/Disks/ObjectStorages/S3/diskSettings.h b/src/Disks/ObjectStorages/S3/diskSettings.h index 05ba8819f83..04eb7aced8e 100644 --- a/src/Disks/ObjectStorages/S3/diskSettings.h +++ b/src/Disks/ObjectStorages/S3/diskSettings.h @@ -22,7 +22,7 @@ struct S3ObjectStorageSettings; std::unique_ptr getSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context); -std::unique_ptr getClient(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context); +std::unique_ptr getClient(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context, const S3ObjectStorageSettings & settings); } diff --git a/src/Disks/ObjectStorages/S3/parseConfig.h b/src/Disks/ObjectStorages/S3/parseConfig.h deleted file mode 100644 index 1defc673c2e..00000000000 --- a/src/Disks/ObjectStorages/S3/parseConfig.h +++ /dev/null @@ -1,27 +0,0 @@ -#pragma once - -#include "config.h" - -#if USE_AWS_S3 - -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -namespace DB -{ - - -std::unique_ptr getSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context); - -std::shared_ptr getClient(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context); - - -} diff --git a/src/Disks/ObjectStorages/S3/registerDiskS3.cpp b/src/Disks/ObjectStorages/S3/registerDiskS3.cpp index e09aef22122..e73accbb956 100644 --- a/src/Disks/ObjectStorages/S3/registerDiskS3.cpp +++ b/src/Disks/ObjectStorages/S3/registerDiskS3.cpp @@ -130,21 +130,16 @@ void registerDiskS3(DiskFactory & factory) chassert(type == "s3" || type == "s3_plain"); MetadataStoragePtr metadata_storage; + auto settings = getSettings(config, config_prefix, context); + auto client = getClient(config, config_prefix, context, *settings); if (type == "s3_plain") { - s3_storage = std::make_shared( - getClient(config, config_prefix, context), - getSettings(config, config_prefix, context), - uri.version_id, s3_capabilities, uri.bucket, uri.endpoint); + s3_storage = std::make_shared(std::move(client), std::move(settings), uri.version_id, s3_capabilities, uri.bucket, uri.endpoint); metadata_storage = std::make_shared(s3_storage, uri.key); } else { - s3_storage = std::make_shared( - getClient(config, config_prefix, context), - getSettings(config, config_prefix, context), - uri.version_id, s3_capabilities, uri.bucket, uri.endpoint); - + s3_storage = std::make_shared(std::move(client), std::move(settings), uri.version_id, s3_capabilities, uri.bucket, uri.endpoint); auto [metadata_path, metadata_disk] = prepareForLocalMetadata(name, config, config_prefix, context); metadata_storage = std::make_shared(metadata_disk, uri.key); } diff --git a/src/IO/ReadBufferFromS3.cpp b/src/IO/ReadBufferFromS3.cpp index c49941b025d..fa748469ca0 100644 --- a/src/IO/ReadBufferFromS3.cpp +++ b/src/IO/ReadBufferFromS3.cpp @@ -45,7 +45,7 @@ ReadBufferFromS3::ReadBufferFromS3( const String & bucket_, const String & key_, const String & version_id_, - UInt64 max_single_read_retries_, + const S3Settings::ReadWriteSettings & s3_settings_, const ReadSettings & settings_, bool use_external_buffer_, size_t offset_, @@ -56,7 +56,7 @@ ReadBufferFromS3::ReadBufferFromS3( , bucket(bucket_) , key(key_) , version_id(version_id_) - , max_single_read_retries(max_single_read_retries_) + , s3_settings(s3_settings_) , offset(offset_) , read_until_position(read_until_position_) , read_settings(settings_) @@ -105,7 +105,7 @@ bool ReadBufferFromS3::nextImpl() } size_t sleep_time_with_backoff_milliseconds = 100; - for (size_t attempt = 0; (attempt < max_single_read_retries) && !next_result; ++attempt) + for (size_t attempt = 0; attempt < s3_settings.max_single_read_retries && !next_result; ++attempt) { Stopwatch watch; try @@ -166,7 +166,7 @@ bool ReadBufferFromS3::nextImpl() attempt, e.message()); - if (attempt + 1 == max_single_read_retries) + if (attempt + 1 == s3_settings.max_single_read_retries) throw; /// Pause before next attempt. @@ -349,7 +349,7 @@ SeekableReadBufferPtr ReadBufferS3Factory::getReader() bucket, key, version_id, - s3_max_single_read_retries, + s3_settings, read_settings, false /*use_external_buffer*/, next_range->first, diff --git a/src/IO/ReadBufferFromS3.h b/src/IO/ReadBufferFromS3.h index cc836bba495..3e1a26f7713 100644 --- a/src/IO/ReadBufferFromS3.h +++ b/src/IO/ReadBufferFromS3.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include "config.h" #if USE_AWS_S3 @@ -33,7 +34,7 @@ private: String bucket; String key; String version_id; - UInt64 max_single_read_retries; + const S3Settings::ReadWriteSettings s3_settings; /// These variables are atomic because they can be used for `logging only` /// (where it is not important to get consistent result) @@ -52,7 +53,7 @@ public: const String & bucket_, const String & key_, const String & version_id_, - UInt64 max_single_read_retries_, + const S3Settings::ReadWriteSettings & s3_settings_, const ReadSettings & settings_, bool use_external_buffer = false, size_t offset_ = 0, @@ -100,7 +101,7 @@ public: const String & version_id_, size_t range_step_, size_t object_size_, - UInt64 s3_max_single_read_retries_, + const S3Settings::ReadWriteSettings & s3_settings_, const ReadSettings & read_settings_) : client_ptr(client_ptr_) , bucket(bucket_) @@ -110,7 +111,7 @@ public: , range_generator(object_size_, range_step_) , range_step(range_step_) , object_size(object_size_) - , s3_max_single_read_retries(s3_max_single_read_retries_) + , s3_settings(s3_settings_) { assert(range_step > 0); assert(range_step < object_size); @@ -135,7 +136,7 @@ private: size_t range_step; size_t object_size; - UInt64 s3_max_single_read_retries; + const S3Settings::ReadWriteSettings s3_settings; }; } diff --git a/src/IO/S3/PocoHTTPClient.cpp b/src/IO/S3/PocoHTTPClient.cpp index 7d053bebe4a..33dd3250c9f 100644 --- a/src/IO/S3/PocoHTTPClient.cpp +++ b/src/IO/S3/PocoHTTPClient.cpp @@ -11,6 +11,7 @@ #include #include +#include #include #include #include @@ -76,12 +77,16 @@ PocoHTTPClientConfiguration::PocoHTTPClientConfiguration( const RemoteHostFilter & remote_host_filter_, unsigned int s3_max_redirects_, bool enable_s3_requests_logging_, - bool for_disk_s3_) + bool for_disk_s3_, + const ThrottlerPtr & get_request_throttler_, + const ThrottlerPtr & put_request_throttler_) : force_region(force_region_) , remote_host_filter(remote_host_filter_) , s3_max_redirects(s3_max_redirects_) , enable_s3_requests_logging(enable_s3_requests_logging_) , for_disk_s3(for_disk_s3_) + , get_request_throttler(get_request_throttler_) + , put_request_throttler(put_request_throttler_) { } @@ -128,6 +133,8 @@ PocoHTTPClient::PocoHTTPClient(const PocoHTTPClientConfiguration & client_config , s3_max_redirects(client_configuration.s3_max_redirects) , enable_s3_requests_logging(client_configuration.enable_s3_requests_logging) , for_disk_s3(client_configuration.for_disk_s3) + , get_request_throttler(client_configuration.get_request_throttler) + , put_request_throttler(client_configuration.put_request_throttler) , extra_headers(client_configuration.extra_headers) { } @@ -245,6 +252,23 @@ void PocoHTTPClient::makeRequestInternal( if (enable_s3_requests_logging) LOG_TEST(log, "Make request to: {}", uri); + switch (request.GetMethod()) + { + case Aws::Http::HttpMethod::HTTP_GET: + case Aws::Http::HttpMethod::HTTP_HEAD: + if (get_request_throttler) + get_request_throttler->add(1); + break; + case Aws::Http::HttpMethod::HTTP_PUT: + case Aws::Http::HttpMethod::HTTP_POST: + case Aws::Http::HttpMethod::HTTP_PATCH: + if (put_request_throttler) + put_request_throttler->add(1); + break; + case Aws::Http::HttpMethod::HTTP_DELETE: + break; // Not throttled + } + addMetric(request, S3MetricType::Count); CurrentMetrics::Increment metric_increment{CurrentMetrics::S3Requests}; diff --git a/src/IO/S3/PocoHTTPClient.h b/src/IO/S3/PocoHTTPClient.h index 5649638285d..ed6e1793c2c 100644 --- a/src/IO/S3/PocoHTTPClient.h +++ b/src/IO/S3/PocoHTTPClient.h @@ -8,6 +8,7 @@ #if USE_AWS_S3 #include +#include #include #include #include @@ -48,6 +49,8 @@ struct PocoHTTPClientConfiguration : public Aws::Client::ClientConfiguration unsigned int s3_max_redirects; bool enable_s3_requests_logging; bool for_disk_s3; + ThrottlerPtr get_request_throttler; + ThrottlerPtr put_request_throttler; HeaderCollection extra_headers; void updateSchemeAndRegion(); @@ -60,7 +63,9 @@ private: const RemoteHostFilter & remote_host_filter_, unsigned int s3_max_redirects_, bool enable_s3_requests_logging_, - bool for_disk_s3_ + bool for_disk_s3_, + const ThrottlerPtr & get_request_throttler_, + const ThrottlerPtr & put_request_throttler_ ); /// Constructor of Aws::Client::ClientConfiguration must be called after AWS SDK initialization. @@ -154,6 +159,16 @@ private: unsigned int s3_max_redirects; bool enable_s3_requests_logging; bool for_disk_s3; + + /// Limits get request per second rate for GET, SELECT and all other requests, excluding throttled by put throttler + /// (i.e. throttles GetObject, HeadObject) + ThrottlerPtr get_request_throttler; + + /// Limits put request per second rate for PUT, COPY, POST, LIST requests + /// (i.e. throttles PutObject, CopyObject, ListObjects, CreateMultipartUpload, UploadPartCopy, UploadPart, CompleteMultipartUpload) + /// NOTE: DELETE and CANCEL requests are not throttled by either put or get throttler + ThrottlerPtr put_request_throttler; + const HeaderCollection extra_headers; }; diff --git a/src/IO/S3/tests/gtest_aws_s3_client.cpp b/src/IO/S3/tests/gtest_aws_s3_client.cpp index 9b2a65d84fc..96a7c8b3dfb 100644 --- a/src/IO/S3/tests/gtest_aws_s3_client.cpp +++ b/src/IO/S3/tests/gtest_aws_s3_client.cpp @@ -88,7 +88,9 @@ TEST(IOTestAwsS3Client, AppendExtraSSECHeaders) remote_host_filter, s3_max_redirects, enable_s3_requests_logging, - /* for_disk_s3 = */ false + /* for_disk_s3 = */ false, + /* get_request_throttler = */ {}, + /* put_request_throttler = */ {} ); client_configuration.endpointOverride = uri.endpoint; @@ -113,12 +115,14 @@ TEST(IOTestAwsS3Client, AppendExtraSSECHeaders) ASSERT_TRUE(client); DB::ReadSettings read_settings; + DB::S3Settings::ReadWriteSettings rw_settings; + rw_settings.max_single_read_retries = max_single_read_retries; DB::ReadBufferFromS3 read_buffer( client, uri.bucket, uri.key, version_id, - max_single_read_retries, + rw_settings, read_settings ); diff --git a/src/IO/S3Common.cpp b/src/IO/S3Common.cpp index 859f5ce796b..5af09275df4 100644 --- a/src/IO/S3Common.cpp +++ b/src/IO/S3Common.cpp @@ -573,7 +573,14 @@ public: /// AWS API tries credentials providers one by one. Some of providers (like ProfileConfigFileAWSCredentialsProvider) can be /// quite verbose even if nobody configured them. So we use our provider first and only after it use default providers. { - DB::S3::PocoHTTPClientConfiguration aws_client_configuration = DB::S3::ClientFactory::instance().createClientConfiguration(configuration.region, configuration.remote_host_filter, configuration.s3_max_redirects, configuration.enable_s3_requests_logging, configuration.for_disk_s3); + DB::S3::PocoHTTPClientConfiguration aws_client_configuration = DB::S3::ClientFactory::instance().createClientConfiguration( + configuration.region, + configuration.remote_host_filter, + configuration.s3_max_redirects, + configuration.enable_s3_requests_logging, + configuration.for_disk_s3, + configuration.get_request_throttler, + configuration.put_request_throttler); AddProvider(std::make_shared(aws_client_configuration)); } @@ -610,7 +617,14 @@ public: } else if (Aws::Utils::StringUtils::ToLower(ec2_metadata_disabled.c_str()) != "true") { - DB::S3::PocoHTTPClientConfiguration aws_client_configuration = DB::S3::ClientFactory::instance().createClientConfiguration(configuration.region, configuration.remote_host_filter, configuration.s3_max_redirects, configuration.enable_s3_requests_logging, configuration.for_disk_s3); + DB::S3::PocoHTTPClientConfiguration aws_client_configuration = DB::S3::ClientFactory::instance().createClientConfiguration( + configuration.region, + configuration.remote_host_filter, + configuration.s3_max_redirects, + configuration.enable_s3_requests_logging, + configuration.for_disk_s3, + configuration.get_request_throttler, + configuration.put_request_throttler); /// See MakeDefaultHttpResourceClientConfiguration(). /// This is part of EC2 metadata client, but unfortunately it can't be accessed from outside @@ -731,9 +745,18 @@ namespace S3 const RemoteHostFilter & remote_host_filter, unsigned int s3_max_redirects, bool enable_s3_requests_logging, - bool for_disk_s3) + bool for_disk_s3, + const ThrottlerPtr & get_request_throttler, + const ThrottlerPtr & put_request_throttler) { - return PocoHTTPClientConfiguration(force_region, remote_host_filter, s3_max_redirects, enable_s3_requests_logging, for_disk_s3); + return PocoHTTPClientConfiguration( + force_region, + remote_host_filter, + s3_max_redirects, + enable_s3_requests_logging, + for_disk_s3, + get_request_throttler, + put_request_throttler); } URI::URI(const Poco::URI & uri_) diff --git a/src/IO/S3Common.h b/src/IO/S3Common.h index 93e5eb78c7f..01a6b8d5d82 100644 --- a/src/IO/S3Common.h +++ b/src/IO/S3Common.h @@ -17,6 +17,7 @@ #include #include +#include namespace Aws::S3 { @@ -88,7 +89,9 @@ public: const RemoteHostFilter & remote_host_filter, unsigned int s3_max_redirects, bool enable_s3_requests_logging, - bool for_disk_s3); + bool for_disk_s3, + const ThrottlerPtr & get_request_throttler, + const ThrottlerPtr & put_request_throttler); private: ClientFactory(); diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index 9bbccf5f582..9309e3f7384 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -100,7 +100,8 @@ public: const Block & virtual_header_, ContextPtr context_, std::unordered_map * object_infos_, - Strings * read_keys_) + Strings * read_keys_, + const S3Settings::ReadWriteSettings & rw_settings_) : WithContext(context_) , client(client_) , globbed_uri(globbed_uri_) @@ -108,6 +109,7 @@ public: , virtual_header(virtual_header_) , object_infos(object_infos_) , read_keys(read_keys_) + , rw_settings(rw_settings_) { if (globbed_uri.bucket.find_first_of("*?{") != globbed_uri.bucket.npos) throw Exception("Expression can not have wildcards inside bucket name", ErrorCodes::UNEXPECTED_EXPRESSION); @@ -258,6 +260,7 @@ private: bool is_finished{false}; std::unordered_map * object_infos; Strings * read_keys; + S3Settings::ReadWriteSettings rw_settings; }; StorageS3Source::DisclosedGlobIterator::DisclosedGlobIterator( @@ -267,8 +270,9 @@ StorageS3Source::DisclosedGlobIterator::DisclosedGlobIterator( const Block & virtual_header, ContextPtr context, std::unordered_map * object_infos_, - Strings * read_keys_) - : pimpl(std::make_shared(client_, globbed_uri_, query, virtual_header, context, object_infos_, read_keys_)) + Strings * read_keys_, + const S3Settings::ReadWriteSettings & rw_settings_) + : pimpl(std::make_shared(client_, globbed_uri_, query, virtual_header, context, object_infos_, read_keys_, rw_settings_)) { } @@ -381,7 +385,7 @@ StorageS3Source::StorageS3Source( std::optional format_settings_, const ColumnsDescription & columns_, UInt64 max_block_size_, - UInt64 max_single_read_retries_, + const S3Settings::ReadWriteSettings & rw_settings_, String compression_hint_, const std::shared_ptr & client_, const String & bucket_, @@ -397,7 +401,7 @@ StorageS3Source::StorageS3Source( , format(format_) , columns_desc(columns_) , max_block_size(max_block_size_) - , max_single_read_retries(max_single_read_retries_) + , rw_settings(rw_settings_) , compression_hint(std::move(compression_hint_)) , client(client_) , sample_block(sample_block_) @@ -463,7 +467,7 @@ std::unique_ptr StorageS3Source::createS3ReadBuffer(const String & k if (!use_parallel_download || object_too_small) { LOG_TRACE(log, "Downloading object of size {} from S3 in single thread", object_size); - return std::make_unique(client, bucket, key, version_id, max_single_read_retries, getContext()->getReadSettings()); + return std::make_unique(client, bucket, key, version_id, rw_settings, getContext()->getReadSettings()); } assert(object_size > 0); @@ -475,7 +479,7 @@ std::unique_ptr StorageS3Source::createS3ReadBuffer(const String & k } auto factory = std::make_unique( - client, bucket, key, version_id, download_buffer_size, object_size, max_single_read_retries, getContext()->getReadSettings()); + client, bucket, key, version_id, download_buffer_size, object_size, rw_settings, getContext()->getReadSettings()); LOG_TRACE( log, "Downloading from S3 in {} threads. Object size: {}, Range size: {}.", download_thread_num, object_size, download_buffer_size); @@ -815,7 +819,7 @@ std::shared_ptr StorageS3::createFileIterator( { /// Iterate through disclosed globs and make a source for each file auto glob_iterator = std::make_shared( - *s3_configuration.client, s3_configuration.uri, query, virtual_block, local_context, object_infos, read_keys); + *s3_configuration.client, s3_configuration.uri, query, virtual_block, local_context, object_infos, read_keys, s3_configuration.rw_settings); return std::make_shared([glob_iterator]() { return glob_iterator->next(); }); } else @@ -905,7 +909,7 @@ Pipe StorageS3::read( format_settings, columns_description, max_block_size, - s3_configuration.rw_settings.max_single_read_retries, + s3_configuration.rw_settings, compression_method, s3_configuration.client, s3_configuration.uri.bucket, @@ -1022,9 +1026,7 @@ void StorageS3::truncate(const ASTPtr & /* query */, const StorageMetadataPtr &, void StorageS3::updateS3Configuration(ContextPtr ctx, StorageS3::S3Configuration & upd) { auto settings = ctx->getStorageS3Settings().getSettings(upd.uri.uri.toString()); - const auto & config_rw_settings = settings.rw_settings; - - if (upd.rw_settings != config_rw_settings) + if (upd.rw_settings != settings.rw_settings) upd.rw_settings = settings.rw_settings; upd.rw_settings.updateFromSettingsIfEmpty(ctx->getSettings()); @@ -1045,7 +1047,9 @@ void StorageS3::updateS3Configuration(ContextPtr ctx, StorageS3::S3Configuration ctx->getRemoteHostFilter(), static_cast(ctx->getGlobalContext()->getSettingsRef().s3_max_redirects), ctx->getGlobalContext()->getSettingsRef().enable_s3_requests_logging, - /* for_disk_s3 = */ false); + /* for_disk_s3 = */ false, + upd.rw_settings.get_request_throttler, + upd.rw_settings.put_request_throttler); client_configuration.endpointOverride = upd.uri.endpoint; client_configuration.maxConnections = static_cast(upd.rw_settings.max_connections); @@ -1082,15 +1086,15 @@ void StorageS3::processNamedCollectionResult(StorageS3Configuration & configurat else if (arg_name == "max_single_read_retries") configuration.rw_settings.max_single_read_retries = checkAndGetLiteralArgument(arg_value, "max_single_read_retries"); else if (arg_name == "min_upload_part_size") - configuration.rw_settings.max_single_read_retries = checkAndGetLiteralArgument(arg_value, "min_upload_part_size"); + configuration.rw_settings.min_upload_part_size = checkAndGetLiteralArgument(arg_value, "min_upload_part_size"); else if (arg_name == "upload_part_size_multiply_factor") - configuration.rw_settings.max_single_read_retries = checkAndGetLiteralArgument(arg_value, "upload_part_size_multiply_factor"); + configuration.rw_settings.upload_part_size_multiply_factor = checkAndGetLiteralArgument(arg_value, "upload_part_size_multiply_factor"); else if (arg_name == "upload_part_size_multiply_parts_count_threshold") - configuration.rw_settings.max_single_read_retries = checkAndGetLiteralArgument(arg_value, "upload_part_size_multiply_parts_count_threshold"); + configuration.rw_settings.upload_part_size_multiply_parts_count_threshold = checkAndGetLiteralArgument(arg_value, "upload_part_size_multiply_parts_count_threshold"); else if (arg_name == "max_single_part_upload_size") - configuration.rw_settings.max_single_read_retries = checkAndGetLiteralArgument(arg_value, "max_single_part_upload_size"); + configuration.rw_settings.max_single_part_upload_size = checkAndGetLiteralArgument(arg_value, "max_single_part_upload_size"); else if (arg_name == "max_connections") - configuration.rw_settings.max_single_read_retries = checkAndGetLiteralArgument(arg_value, "max_connections"); + configuration.rw_settings.max_connections = checkAndGetLiteralArgument(arg_value, "max_connections"); else throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Unknown key-value argument `{}` for StorageS3, expected: url, [access_key_id, secret_access_key], name of used format and [compression_method].", @@ -1220,7 +1224,7 @@ ColumnsDescription StorageS3::getTableStructureFromDataImpl( int zstd_window_log_max = static_cast(ctx->getSettingsRef().zstd_window_log_max); return wrapReadBufferWithCompressionMethod( std::make_unique( - s3_configuration.client, s3_configuration.uri.bucket, key, s3_configuration.uri.version_id, s3_configuration.rw_settings.max_single_read_retries, ctx->getReadSettings()), + s3_configuration.client, s3_configuration.uri.bucket, key, s3_configuration.uri.version_id, s3_configuration.rw_settings, ctx->getReadSettings()), chooseCompressionMethod(key, compression_method), zstd_window_log_max); }; diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h index 23947a32092..76391cb2695 100644 --- a/src/Storages/StorageS3.h +++ b/src/Storages/StorageS3.h @@ -43,7 +43,8 @@ public: const Block & virtual_header, ContextPtr context, std::unordered_map * object_infos = nullptr, - Strings * read_keys_ = nullptr); + Strings * read_keys_ = nullptr, + const S3Settings::ReadWriteSettings & rw_settings_ = {}); String next(); @@ -79,7 +80,7 @@ public: std::optional format_settings_, const ColumnsDescription & columns_, UInt64 max_block_size_, - UInt64 max_single_read_retries_, + const S3Settings::ReadWriteSettings & rw_settings_, String compression_hint_, const std::shared_ptr & client_, const String & bucket, @@ -102,7 +103,7 @@ private: String format; ColumnsDescription columns_desc; UInt64 max_block_size; - UInt64 max_single_read_retries; + S3Settings::ReadWriteSettings rw_settings; String compression_hint; std::shared_ptr client; Block sample_block; diff --git a/src/Storages/StorageS3Settings.cpp b/src/Storages/StorageS3Settings.cpp index 65e9bb1ab8c..9e670c65e17 100644 --- a/src/Storages/StorageS3Settings.cpp +++ b/src/Storages/StorageS3Settings.cpp @@ -4,6 +4,7 @@ #include #include +#include #include #include @@ -56,6 +57,14 @@ void StorageS3Settings::loadFromConfig(const String & config_elem, const Poco::U rw_settings.max_connections = get_uint_for_key(key, "max_connections", true, settings.s3_max_connections); rw_settings.check_objects_after_upload = get_bool_for_key(key, "check_objects_after_upload", true, false); + // NOTE: it would be better to reuse old throttlers to avoid losing token bucket state on every config reload, which could lead to exceeding limit for short time. But it is good enough unless very high `burst` values are used. + if (UInt64 max_get_rps = get_uint_for_key(key, "max_get_rps", true, settings.s3_max_get_rps)) + rw_settings.get_request_throttler = std::make_shared( + max_get_rps, get_uint_for_key(key, "max_get_burst", true, settings.s3_max_get_burst ? settings.s3_max_get_burst : Throttler::default_burst_seconds * max_get_rps)); + if (UInt64 max_put_rps = get_uint_for_key(key, "max_put_rps", true, settings.s3_max_put_rps)) + rw_settings.put_request_throttler = std::make_shared( + max_put_rps, get_uint_for_key(key, "max_put_burst", true, settings.s3_max_put_burst ? settings.s3_max_put_burst : Throttler::default_burst_seconds * max_put_rps)); + s3_settings.emplace(endpoint, S3Settings{std::move(auth_settings), std::move(rw_settings)}); } } @@ -87,6 +96,12 @@ S3Settings::ReadWriteSettings::ReadWriteSettings(const Settings & settings) max_connections = settings.s3_max_connections; check_objects_after_upload = settings.s3_check_objects_after_upload; max_unexpected_write_error_retries = settings.s3_max_unexpected_write_error_retries; + if (settings.s3_max_get_rps) + get_request_throttler = std::make_shared( + settings.s3_max_get_rps, settings.s3_max_get_burst ? settings.s3_max_get_burst : Throttler::default_burst_seconds * settings.s3_max_get_rps); + if (settings.s3_max_put_rps) + put_request_throttler = std::make_shared( + settings.s3_max_put_rps, settings.s3_max_put_burst ? settings.s3_max_put_burst : Throttler::default_burst_seconds * settings.s3_max_put_rps); } void S3Settings::ReadWriteSettings::updateFromSettingsIfEmpty(const Settings & settings) @@ -106,6 +121,12 @@ void S3Settings::ReadWriteSettings::updateFromSettingsIfEmpty(const Settings & s if (!max_unexpected_write_error_retries) max_unexpected_write_error_retries = settings.s3_max_unexpected_write_error_retries; check_objects_after_upload = settings.s3_check_objects_after_upload; + if (!get_request_throttler && settings.s3_max_get_rps) + get_request_throttler = std::make_shared( + settings.s3_max_get_rps, settings.s3_max_get_burst ? settings.s3_max_get_burst : Throttler::default_burst_seconds * settings.s3_max_get_rps); + if (!put_request_throttler && settings.s3_max_put_rps) + put_request_throttler = std::make_shared( + settings.s3_max_put_rps, settings.s3_max_put_burst ? settings.s3_max_put_burst : Throttler::default_burst_seconds * settings.s3_max_put_rps); } } diff --git a/src/Storages/StorageS3Settings.h b/src/Storages/StorageS3Settings.h index 2da4a1d7590..40ba11a19ba 100644 --- a/src/Storages/StorageS3Settings.h +++ b/src/Storages/StorageS3Settings.h @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -33,6 +34,8 @@ struct S3Settings size_t max_connections = 0; bool check_objects_after_upload = false; size_t max_unexpected_write_error_retries = 0; + ThrottlerPtr get_request_throttler; + ThrottlerPtr put_request_throttler; ReadWriteSettings() = default; explicit ReadWriteSettings(const Settings & settings); @@ -46,7 +49,9 @@ struct S3Settings && max_single_part_upload_size == other.max_single_part_upload_size && max_connections == other.max_connections && check_objects_after_upload == other.check_objects_after_upload - && max_unexpected_write_error_retries == other.max_unexpected_write_error_retries; + && max_unexpected_write_error_retries == other.max_unexpected_write_error_retries + && get_request_throttler == other.get_request_throttler + && put_request_throttler == other.put_request_throttler; } void updateFromSettingsIfEmpty(const Settings & settings); From 6d5d9ff421cbe7497c6c94f032eb2f9807fe2799 Mon Sep 17 00:00:00 2001 From: serxa Date: Tue, 8 Nov 2022 13:48:23 +0000 Subject: [PATCH 12/80] rename ReadWriteSettings -> RequestSettings --- src/Backups/BackupIO_S3.cpp | 14 ++--- src/Backups/BackupIO_S3.h | 4 +- src/Coordination/KeeperSnapshotManagerS3.cpp | 12 ++--- .../ObjectStorages/S3/S3ObjectStorage.cpp | 12 ++--- src/Disks/ObjectStorages/S3/S3ObjectStorage.h | 6 +-- src/Disks/ObjectStorages/S3/diskSettings.cpp | 26 ++++----- src/IO/ReadBufferFromS3.cpp | 10 ++-- src/IO/ReadBufferFromS3.h | 10 ++-- src/IO/S3/tests/gtest_aws_s3_client.cpp | 6 +-- src/IO/WriteBufferFromS3.cpp | 18 +++---- src/IO/WriteBufferFromS3.h | 4 +- .../ExternalDataSourceConfiguration.h | 2 +- src/Storages/StorageS3.cpp | 54 +++++++++---------- src/Storages/StorageS3.h | 12 ++--- src/Storages/StorageS3Cluster.cpp | 2 +- src/Storages/StorageS3Settings.cpp | 26 ++++----- src/Storages/StorageS3Settings.h | 12 ++--- 17 files changed, 115 insertions(+), 115 deletions(-) diff --git a/src/Backups/BackupIO_S3.cpp b/src/Backups/BackupIO_S3.cpp index 3f723c98a2a..f794500980b 100644 --- a/src/Backups/BackupIO_S3.cpp +++ b/src/Backups/BackupIO_S3.cpp @@ -87,7 +87,7 @@ BackupReaderS3::BackupReaderS3( : s3_uri(s3_uri_) , client(makeS3Client(s3_uri_, access_key_id_, secret_access_key_, context_)) , read_settings(context_->getReadSettings()) - , rw_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString()).rw_settings) + , request_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString()).request_settings) { } @@ -115,7 +115,7 @@ UInt64 BackupReaderS3::getFileSize(const String & file_name) std::unique_ptr BackupReaderS3::readFile(const String & file_name) { return std::make_unique( - client, s3_uri.bucket, fs::path(s3_uri.key) / file_name, s3_uri.version_id, rw_settings, read_settings); + client, s3_uri.bucket, fs::path(s3_uri.key) / file_name, s3_uri.version_id, request_settings, read_settings); } @@ -124,9 +124,9 @@ BackupWriterS3::BackupWriterS3( : s3_uri(s3_uri_) , client(makeS3Client(s3_uri_, access_key_id_, secret_access_key_, context_)) , read_settings(context_->getReadSettings()) - , rw_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString()).rw_settings) + , request_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString()).request_settings) { - rw_settings.updateFromSettingsIfEmpty(context_->getSettingsRef()); + request_settings.updateFromSettingsIfEmpty(context_->getSettingsRef()); } DataSourceDescription BackupWriterS3::getDataSourceDescription() const @@ -212,7 +212,7 @@ void BackupWriterS3::copyObjectMultipartImpl( std::vector part_tags; - size_t upload_part_size = rw_settings.min_upload_part_size; + size_t upload_part_size = request_settings.min_upload_part_size; for (size_t position = 0, part_number = 1; position < size; ++part_number, position += upload_part_size) { Aws::S3::Model::UploadPartCopyRequest part_request; @@ -317,7 +317,7 @@ bool BackupWriterS3::fileContentsEqual(const String & file_name, const String & try { auto in = std::make_unique( - client, s3_uri.bucket, fs::path(s3_uri.key) / file_name, s3_uri.version_id, rw_settings, read_settings); + client, s3_uri.bucket, fs::path(s3_uri.key) / file_name, s3_uri.version_id, request_settings, read_settings); String actual_file_contents(expected_file_contents.size(), ' '); return (in->read(actual_file_contents.data(), actual_file_contents.size()) == actual_file_contents.size()) && (actual_file_contents == expected_file_contents) && in->eof(); @@ -335,7 +335,7 @@ std::unique_ptr BackupWriterS3::writeFile(const String & file_name) client, s3_uri.bucket, fs::path(s3_uri.key) / file_name, - rw_settings, + request_settings, std::nullopt, DBMS_DEFAULT_BUFFER_SIZE, threadPoolCallbackRunner(IOThreadPool::get(), "BackupWriterS3")); diff --git a/src/Backups/BackupIO_S3.h b/src/Backups/BackupIO_S3.h index 17a365da396..99d7558ac9e 100644 --- a/src/Backups/BackupIO_S3.h +++ b/src/Backups/BackupIO_S3.h @@ -40,7 +40,7 @@ private: S3::URI s3_uri; std::shared_ptr client; ReadSettings read_settings; - S3Settings::ReadWriteSettings rw_settings; + S3Settings::RequestSettings request_settings; }; @@ -83,7 +83,7 @@ private: S3::URI s3_uri; std::shared_ptr client; ReadSettings read_settings; - S3Settings::ReadWriteSettings rw_settings; + S3Settings::RequestSettings request_settings; }; } diff --git a/src/Coordination/KeeperSnapshotManagerS3.cpp b/src/Coordination/KeeperSnapshotManagerS3.cpp index 8777ef341ff..cc837f5f496 100644 --- a/src/Coordination/KeeperSnapshotManagerS3.cpp +++ b/src/Coordination/KeeperSnapshotManagerS3.cpp @@ -135,8 +135,8 @@ void KeeperSnapshotManagerS3::uploadSnapshotImpl(const std::string & snapshot_pa if (s3_client == nullptr) return; - S3Settings::ReadWriteSettings read_write_settings; - read_write_settings.upload_part_size_multiply_parts_count_threshold = 10000; + S3Settings::RequestSettings request_settings_1; + request_settings_1.upload_part_size_multiply_parts_count_threshold = 10000; const auto create_writer = [&](const auto & key) { @@ -145,7 +145,7 @@ void KeeperSnapshotManagerS3::uploadSnapshotImpl(const std::string & snapshot_pa s3_client->client, s3_client->uri.bucket, key, - read_write_settings + request_settings_1 }; }; @@ -194,15 +194,15 @@ void KeeperSnapshotManagerS3::uploadSnapshotImpl(const std::string & snapshot_pa lock_writer.finalize(); // We read back the written UUID, if it's the same we can upload the file - S3Settings::ReadWriteSettings rw_settings; - rw_settings.max_single_read_retries = 1; + S3Settings::RequestSettings request_settings_2; + request_settings_2.max_single_read_retries = 1; ReadBufferFromS3 lock_reader { s3_client->client, s3_client->uri.bucket, lock_file, "", - rw_settings, + request_settings_2, {} }; diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index 21b7ff4f0f1..bbb5c4cddc5 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -175,7 +175,7 @@ std::unique_ptr S3ObjectStorage::readObjects( /// NOLINT bucket, path, version_id, - settings_ptr->s3_settings, + settings_ptr->request_settings, disk_read_settings, /* use_external_buffer */true, /* offset */0, @@ -212,7 +212,7 @@ std::unique_ptr S3ObjectStorage::readObject( /// NOLINT bucket, object.absolute_path, version_id, - settings_ptr->s3_settings, + settings_ptr->request_settings, patchSettings(read_settings)); } @@ -238,7 +238,7 @@ std::unique_ptr S3ObjectStorage::writeObject( /// NOLIN client.get(), bucket, object.absolute_path, - settings_ptr->s3_settings, + settings_ptr->request_settings, attributes, buf_size, std::move(scheduler), @@ -473,7 +473,7 @@ void S3ObjectStorage::copyObjectImpl( throwIfError(outcome); auto settings_ptr = s3_settings.get(); - if (settings_ptr->s3_settings.check_objects_after_upload) + if (settings_ptr->request_settings.check_objects_after_upload) { auto object_head = requestObjectHeadData(dst_bucket, dst_key); if (!object_head.IsSuccess()) @@ -517,7 +517,7 @@ void S3ObjectStorage::copyObjectMultipartImpl( std::vector part_tags; - size_t upload_part_size = settings_ptr->s3_settings.min_upload_part_size; + size_t upload_part_size = settings_ptr->request_settings.min_upload_part_size; for (size_t position = 0, part_number = 1; position < size; ++part_number, position += upload_part_size) { ProfileEvents::increment(ProfileEvents::S3UploadPartCopy); @@ -570,7 +570,7 @@ void S3ObjectStorage::copyObjectMultipartImpl( throwIfError(outcome); } - if (settings_ptr->s3_settings.check_objects_after_upload) + if (settings_ptr->request_settings.check_objects_after_upload) { auto object_head = requestObjectHeadData(dst_bucket, dst_key); if (!object_head.IsSuccess()) diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h index 6b1e8289b15..a737d3bc114 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h @@ -23,17 +23,17 @@ struct S3ObjectStorageSettings S3ObjectStorageSettings() = default; S3ObjectStorageSettings( - const S3Settings::ReadWriteSettings & s3_settings_, + const S3Settings::RequestSettings & request_settings_, uint64_t min_bytes_for_seek_, int32_t list_object_keys_size_, int32_t objects_chunk_size_to_delete_) - : s3_settings(s3_settings_) + : request_settings(request_settings_) , min_bytes_for_seek(min_bytes_for_seek_) , list_object_keys_size(list_object_keys_size_) , objects_chunk_size_to_delete(objects_chunk_size_to_delete_) {} - S3Settings::ReadWriteSettings s3_settings; + S3Settings::RequestSettings request_settings; uint64_t min_bytes_for_seek; int32_t list_object_keys_size; diff --git a/src/Disks/ObjectStorages/S3/diskSettings.cpp b/src/Disks/ObjectStorages/S3/diskSettings.cpp index 707675c01fd..ca2e9d04926 100644 --- a/src/Disks/ObjectStorages/S3/diskSettings.cpp +++ b/src/Disks/ObjectStorages/S3/diskSettings.cpp @@ -34,25 +34,25 @@ namespace ErrorCodes std::unique_ptr getSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context) { const Settings & settings = context->getSettingsRef(); - S3Settings::ReadWriteSettings rw_settings; - rw_settings.max_single_read_retries = config.getUInt64(config_prefix + ".s3_max_single_read_retries", settings.s3_max_single_read_retries); - rw_settings.min_upload_part_size = config.getUInt64(config_prefix + ".s3_min_upload_part_size", settings.s3_min_upload_part_size); - rw_settings.upload_part_size_multiply_factor = config.getUInt64(config_prefix + ".s3_upload_part_size_multiply_factor", settings.s3_upload_part_size_multiply_factor); - rw_settings.upload_part_size_multiply_parts_count_threshold = config.getUInt64(config_prefix + ".s3_upload_part_size_multiply_parts_count_threshold", settings.s3_upload_part_size_multiply_parts_count_threshold); - rw_settings.max_single_part_upload_size = config.getUInt64(config_prefix + ".s3_max_single_part_upload_size", settings.s3_max_single_part_upload_size); - rw_settings.check_objects_after_upload = config.getUInt64(config_prefix + ".s3_check_objects_after_upload", settings.s3_check_objects_after_upload); - rw_settings.max_unexpected_write_error_retries = config.getUInt64(config_prefix + ".s3_max_unexpected_write_error_retries", settings.s3_max_unexpected_write_error_retries); + S3Settings::RequestSettings request_settings; + request_settings.max_single_read_retries = config.getUInt64(config_prefix + ".s3_max_single_read_retries", settings.s3_max_single_read_retries); + request_settings.min_upload_part_size = config.getUInt64(config_prefix + ".s3_min_upload_part_size", settings.s3_min_upload_part_size); + request_settings.upload_part_size_multiply_factor = config.getUInt64(config_prefix + ".s3_upload_part_size_multiply_factor", settings.s3_upload_part_size_multiply_factor); + request_settings.upload_part_size_multiply_parts_count_threshold = config.getUInt64(config_prefix + ".s3_upload_part_size_multiply_parts_count_threshold", settings.s3_upload_part_size_multiply_parts_count_threshold); + request_settings.max_single_part_upload_size = config.getUInt64(config_prefix + ".s3_max_single_part_upload_size", settings.s3_max_single_part_upload_size); + request_settings.check_objects_after_upload = config.getUInt64(config_prefix + ".s3_check_objects_after_upload", settings.s3_check_objects_after_upload); + request_settings.max_unexpected_write_error_retries = config.getUInt64(config_prefix + ".s3_max_unexpected_write_error_retries", settings.s3_max_unexpected_write_error_retries); // NOTE: it would be better to reuse old throttlers to avoid losing token bucket state on every config reload, which could lead to exceeding limit for short time. But it is good enough unless very high `burst` values are used. if (UInt64 max_get_rps = config.getUInt64(config_prefix + ".s3_max_get_rps", settings.s3_max_get_rps)) - rw_settings.get_request_throttler = std::make_shared( + request_settings.get_request_throttler = std::make_shared( max_get_rps, config.getUInt64(config_prefix + ".s3_max_get_burst", settings.s3_max_get_burst ? settings.s3_max_get_burst : Throttler::default_burst_seconds * max_get_rps)); if (UInt64 max_put_rps = config.getUInt64(config_prefix + ".s3_max_put_rps", settings.s3_max_put_rps)) - rw_settings.put_request_throttler = std::make_shared( + request_settings.put_request_throttler = std::make_shared( max_put_rps, config.getUInt64(config_prefix + ".s3_max_put_burst", settings.s3_max_put_burst ? settings.s3_max_put_burst : Throttler::default_burst_seconds * max_put_rps)); return std::make_unique( - rw_settings, + request_settings, config.getUInt64(config_prefix + ".min_bytes_for_seek", 1024 * 1024), config.getInt(config_prefix + ".list_object_keys_size", 1000), config.getInt(config_prefix + ".objects_chunk_size_to_delete", 1000)); @@ -134,8 +134,8 @@ std::unique_ptr getClient( static_cast(context->getGlobalContext()->getSettingsRef().s3_max_redirects), context->getGlobalContext()->getSettingsRef().enable_s3_requests_logging, /* for_disk_s3 = */ true, - settings.s3_settings.get_request_throttler, - settings.s3_settings.put_request_throttler); + settings.request_settings.get_request_throttler, + settings.request_settings.put_request_throttler); S3::URI uri(Poco::URI(config.getString(config_prefix + ".endpoint"))); if (uri.key.back() != '/') diff --git a/src/IO/ReadBufferFromS3.cpp b/src/IO/ReadBufferFromS3.cpp index fa748469ca0..c14fbecf223 100644 --- a/src/IO/ReadBufferFromS3.cpp +++ b/src/IO/ReadBufferFromS3.cpp @@ -45,7 +45,7 @@ ReadBufferFromS3::ReadBufferFromS3( const String & bucket_, const String & key_, const String & version_id_, - const S3Settings::ReadWriteSettings & s3_settings_, + const S3Settings::RequestSettings & request_settings_, const ReadSettings & settings_, bool use_external_buffer_, size_t offset_, @@ -56,7 +56,7 @@ ReadBufferFromS3::ReadBufferFromS3( , bucket(bucket_) , key(key_) , version_id(version_id_) - , s3_settings(s3_settings_) + , request_settings(request_settings_) , offset(offset_) , read_until_position(read_until_position_) , read_settings(settings_) @@ -105,7 +105,7 @@ bool ReadBufferFromS3::nextImpl() } size_t sleep_time_with_backoff_milliseconds = 100; - for (size_t attempt = 0; attempt < s3_settings.max_single_read_retries && !next_result; ++attempt) + for (size_t attempt = 0; attempt < request_settings.max_single_read_retries && !next_result; ++attempt) { Stopwatch watch; try @@ -166,7 +166,7 @@ bool ReadBufferFromS3::nextImpl() attempt, e.message()); - if (attempt + 1 == s3_settings.max_single_read_retries) + if (attempt + 1 == request_settings.max_single_read_retries) throw; /// Pause before next attempt. @@ -349,7 +349,7 @@ SeekableReadBufferPtr ReadBufferS3Factory::getReader() bucket, key, version_id, - s3_settings, + request_settings, read_settings, false /*use_external_buffer*/, next_range->first, diff --git a/src/IO/ReadBufferFromS3.h b/src/IO/ReadBufferFromS3.h index 3e1a26f7713..17b13bf7d62 100644 --- a/src/IO/ReadBufferFromS3.h +++ b/src/IO/ReadBufferFromS3.h @@ -34,7 +34,7 @@ private: String bucket; String key; String version_id; - const S3Settings::ReadWriteSettings s3_settings; + const S3Settings::RequestSettings request_settings; /// These variables are atomic because they can be used for `logging only` /// (where it is not important to get consistent result) @@ -53,7 +53,7 @@ public: const String & bucket_, const String & key_, const String & version_id_, - const S3Settings::ReadWriteSettings & s3_settings_, + const S3Settings::RequestSettings & request_settings_, const ReadSettings & settings_, bool use_external_buffer = false, size_t offset_ = 0, @@ -101,7 +101,7 @@ public: const String & version_id_, size_t range_step_, size_t object_size_, - const S3Settings::ReadWriteSettings & s3_settings_, + const S3Settings::RequestSettings & request_settings_, const ReadSettings & read_settings_) : client_ptr(client_ptr_) , bucket(bucket_) @@ -111,7 +111,7 @@ public: , range_generator(object_size_, range_step_) , range_step(range_step_) , object_size(object_size_) - , s3_settings(s3_settings_) + , request_settings(request_settings_) { assert(range_step > 0); assert(range_step < object_size); @@ -136,7 +136,7 @@ private: size_t range_step; size_t object_size; - const S3Settings::ReadWriteSettings s3_settings; + const S3Settings::RequestSettings request_settings; }; } diff --git a/src/IO/S3/tests/gtest_aws_s3_client.cpp b/src/IO/S3/tests/gtest_aws_s3_client.cpp index 96a7c8b3dfb..2d298ca5df2 100644 --- a/src/IO/S3/tests/gtest_aws_s3_client.cpp +++ b/src/IO/S3/tests/gtest_aws_s3_client.cpp @@ -115,14 +115,14 @@ TEST(IOTestAwsS3Client, AppendExtraSSECHeaders) ASSERT_TRUE(client); DB::ReadSettings read_settings; - DB::S3Settings::ReadWriteSettings rw_settings; - rw_settings.max_single_read_retries = max_single_read_retries; + DB::S3Settings::RequestSettings request_settings; + request_settings.max_single_read_retries = max_single_read_retries; DB::ReadBufferFromS3 read_buffer( client, uri.bucket, uri.key, version_id, - rw_settings, + request_settings, read_settings ); diff --git a/src/IO/WriteBufferFromS3.cpp b/src/IO/WriteBufferFromS3.cpp index f823015bd7d..d3a5b185248 100644 --- a/src/IO/WriteBufferFromS3.cpp +++ b/src/IO/WriteBufferFromS3.cpp @@ -71,7 +71,7 @@ WriteBufferFromS3::WriteBufferFromS3( std::shared_ptr client_ptr_, const String & bucket_, const String & key_, - const S3Settings::ReadWriteSettings & s3_settings_, + const S3Settings::RequestSettings & request_settings_, std::optional> object_metadata_, size_t buffer_size_, ThreadPoolCallbackRunner schedule_, @@ -79,10 +79,10 @@ WriteBufferFromS3::WriteBufferFromS3( : BufferWithOwnMemory(buffer_size_, nullptr, 0) , bucket(bucket_) , key(key_) - , s3_settings(s3_settings_) + , request_settings(request_settings_) , client_ptr(std::move(client_ptr_)) , object_metadata(std::move(object_metadata_)) - , upload_part_size(s3_settings_.min_upload_part_size) + , upload_part_size(request_settings_.min_upload_part_size) , schedule(std::move(schedule_)) , write_settings(write_settings_) { @@ -107,7 +107,7 @@ void WriteBufferFromS3::nextImpl() write_settings.remote_throttler->add(offset()); /// Data size exceeds singlepart upload threshold, need to use multipart upload. - if (multipart_upload_id.empty() && last_part_size > s3_settings.max_single_part_upload_size) + if (multipart_upload_id.empty() && last_part_size > request_settings.max_single_part_upload_size) createMultipartUpload(); if (!multipart_upload_id.empty() && last_part_size > upload_part_size) @@ -122,8 +122,8 @@ void WriteBufferFromS3::nextImpl() void WriteBufferFromS3::allocateBuffer() { - if (total_parts_uploaded != 0 && total_parts_uploaded % s3_settings.upload_part_size_multiply_parts_count_threshold == 0) - upload_part_size *= s3_settings.upload_part_size_multiply_factor; + if (total_parts_uploaded != 0 && total_parts_uploaded % request_settings.upload_part_size_multiply_parts_count_threshold == 0) + upload_part_size *= request_settings.upload_part_size_multiply_factor; temporary_buffer = Aws::MakeShared("temporary buffer"); temporary_buffer->exceptions(std::ios::badbit); @@ -177,7 +177,7 @@ void WriteBufferFromS3::finalizeImpl() if (!multipart_upload_id.empty()) completeMultipartUpload(); - if (s3_settings.check_objects_after_upload) + if (request_settings.check_objects_after_upload) { LOG_TRACE(log, "Checking object {} exists after upload", key); @@ -367,7 +367,7 @@ void WriteBufferFromS3::completeMultipartUpload() req.SetMultipartUpload(multipart_upload); - size_t max_retry = std::max(s3_settings.max_unexpected_write_error_retries, 1UL); + size_t max_retry = std::max(request_settings.max_unexpected_write_error_retries, 1UL); for (size_t i = 0; i < max_retry; ++i) { ProfileEvents::increment(ProfileEvents::S3CompleteMultipartUpload); @@ -473,7 +473,7 @@ void WriteBufferFromS3::fillPutRequest(Aws::S3::Model::PutObjectRequest & req) void WriteBufferFromS3::processPutRequest(const PutObjectTask & task) { - size_t max_retry = std::max(s3_settings.max_unexpected_write_error_retries, 1UL); + size_t max_retry = std::max(request_settings.max_unexpected_write_error_retries, 1UL); for (size_t i = 0; i < max_retry; ++i) { ProfileEvents::increment(ProfileEvents::S3PutObject); diff --git a/src/IO/WriteBufferFromS3.h b/src/IO/WriteBufferFromS3.h index 28f831856d7..b4b5a6d37a3 100644 --- a/src/IO/WriteBufferFromS3.h +++ b/src/IO/WriteBufferFromS3.h @@ -50,7 +50,7 @@ public: std::shared_ptr client_ptr_, const String & bucket_, const String & key_, - const S3Settings::ReadWriteSettings & s3_settings_, + const S3Settings::RequestSettings & request_settings_, std::optional> object_metadata_ = std::nullopt, size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE, ThreadPoolCallbackRunner schedule_ = {}, @@ -88,7 +88,7 @@ private: const String bucket; const String key; - const S3Settings::ReadWriteSettings s3_settings; + const S3Settings::RequestSettings request_settings; const std::shared_ptr client_ptr; const std::optional> object_metadata; diff --git a/src/Storages/ExternalDataSourceConfiguration.h b/src/Storages/ExternalDataSourceConfiguration.h index 5736336983a..a8c27e3d1d4 100644 --- a/src/Storages/ExternalDataSourceConfiguration.h +++ b/src/Storages/ExternalDataSourceConfiguration.h @@ -118,7 +118,7 @@ struct URLBasedDataSourceConfiguration struct StorageS3Configuration : URLBasedDataSourceConfiguration { S3::AuthSettings auth_settings; - S3Settings::ReadWriteSettings rw_settings; + S3Settings::RequestSettings request_settings; }; diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index 9309e3f7384..cc13838a4c7 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -101,7 +101,7 @@ public: ContextPtr context_, std::unordered_map * object_infos_, Strings * read_keys_, - const S3Settings::ReadWriteSettings & rw_settings_) + const S3Settings::RequestSettings & request_settings_) : WithContext(context_) , client(client_) , globbed_uri(globbed_uri_) @@ -109,7 +109,7 @@ public: , virtual_header(virtual_header_) , object_infos(object_infos_) , read_keys(read_keys_) - , rw_settings(rw_settings_) + , request_settings(request_settings_) { if (globbed_uri.bucket.find_first_of("*?{") != globbed_uri.bucket.npos) throw Exception("Expression can not have wildcards inside bucket name", ErrorCodes::UNEXPECTED_EXPRESSION); @@ -260,7 +260,7 @@ private: bool is_finished{false}; std::unordered_map * object_infos; Strings * read_keys; - S3Settings::ReadWriteSettings rw_settings; + S3Settings::RequestSettings request_settings; }; StorageS3Source::DisclosedGlobIterator::DisclosedGlobIterator( @@ -271,8 +271,8 @@ StorageS3Source::DisclosedGlobIterator::DisclosedGlobIterator( ContextPtr context, std::unordered_map * object_infos_, Strings * read_keys_, - const S3Settings::ReadWriteSettings & rw_settings_) - : pimpl(std::make_shared(client_, globbed_uri_, query, virtual_header, context, object_infos_, read_keys_, rw_settings_)) + const S3Settings::RequestSettings & request_settings_) + : pimpl(std::make_shared(client_, globbed_uri_, query, virtual_header, context, object_infos_, read_keys_, request_settings_)) { } @@ -385,7 +385,7 @@ StorageS3Source::StorageS3Source( std::optional format_settings_, const ColumnsDescription & columns_, UInt64 max_block_size_, - const S3Settings::ReadWriteSettings & rw_settings_, + const S3Settings::RequestSettings & request_settings_, String compression_hint_, const std::shared_ptr & client_, const String & bucket_, @@ -401,7 +401,7 @@ StorageS3Source::StorageS3Source( , format(format_) , columns_desc(columns_) , max_block_size(max_block_size_) - , rw_settings(rw_settings_) + , request_settings(request_settings_) , compression_hint(std::move(compression_hint_)) , client(client_) , sample_block(sample_block_) @@ -467,7 +467,7 @@ std::unique_ptr StorageS3Source::createS3ReadBuffer(const String & k if (!use_parallel_download || object_too_small) { LOG_TRACE(log, "Downloading object of size {} from S3 in single thread", object_size); - return std::make_unique(client, bucket, key, version_id, rw_settings, getContext()->getReadSettings()); + return std::make_unique(client, bucket, key, version_id, request_settings, getContext()->getReadSettings()); } assert(object_size > 0); @@ -479,7 +479,7 @@ std::unique_ptr StorageS3Source::createS3ReadBuffer(const String & k } auto factory = std::make_unique( - client, bucket, key, version_id, download_buffer_size, object_size, rw_settings, getContext()->getReadSettings()); + client, bucket, key, version_id, download_buffer_size, object_size, request_settings, getContext()->getReadSettings()); LOG_TRACE( log, "Downloading from S3 in {} threads. Object size: {}, Range size: {}.", download_thread_num, object_size, download_buffer_size); @@ -589,7 +589,7 @@ public: s3_configuration_.client, bucket, key, - s3_configuration_.rw_settings, + s3_configuration_.request_settings, std::nullopt, DBMS_DEFAULT_BUFFER_SIZE, threadPoolCallbackRunner(IOThreadPool::get(), "S3ParallelRead"), @@ -753,7 +753,7 @@ StorageS3::StorageS3( bool distributed_processing_, ASTPtr partition_by_) : IStorage(table_id_) - , s3_configuration{configuration_.url, configuration_.auth_settings, configuration_.rw_settings, configuration_.headers} + , s3_configuration{configuration_.url, configuration_.auth_settings, configuration_.request_settings, configuration_.headers} , keys({s3_configuration.uri.key}) , format_name(configuration_.format) , compression_method(configuration_.compression_method) @@ -819,7 +819,7 @@ std::shared_ptr StorageS3::createFileIterator( { /// Iterate through disclosed globs and make a source for each file auto glob_iterator = std::make_shared( - *s3_configuration.client, s3_configuration.uri, query, virtual_block, local_context, object_infos, read_keys, s3_configuration.rw_settings); + *s3_configuration.client, s3_configuration.uri, query, virtual_block, local_context, object_infos, read_keys, s3_configuration.request_settings); return std::make_shared([glob_iterator]() { return glob_iterator->next(); }); } else @@ -909,7 +909,7 @@ Pipe StorageS3::read( format_settings, columns_description, max_block_size, - s3_configuration.rw_settings, + s3_configuration.request_settings, compression_method, s3_configuration.client, s3_configuration.uri.bucket, @@ -1026,10 +1026,10 @@ void StorageS3::truncate(const ASTPtr & /* query */, const StorageMetadataPtr &, void StorageS3::updateS3Configuration(ContextPtr ctx, StorageS3::S3Configuration & upd) { auto settings = ctx->getStorageS3Settings().getSettings(upd.uri.uri.toString()); - if (upd.rw_settings != settings.rw_settings) - upd.rw_settings = settings.rw_settings; + if (upd.request_settings != settings.request_settings) + upd.request_settings = settings.request_settings; - upd.rw_settings.updateFromSettingsIfEmpty(ctx->getSettings()); + upd.request_settings.updateFromSettingsIfEmpty(ctx->getSettings()); if (upd.client) { @@ -1048,11 +1048,11 @@ void StorageS3::updateS3Configuration(ContextPtr ctx, StorageS3::S3Configuration static_cast(ctx->getGlobalContext()->getSettingsRef().s3_max_redirects), ctx->getGlobalContext()->getSettingsRef().enable_s3_requests_logging, /* for_disk_s3 = */ false, - upd.rw_settings.get_request_throttler, - upd.rw_settings.put_request_throttler); + upd.request_settings.get_request_throttler, + upd.request_settings.put_request_throttler); client_configuration.endpointOverride = upd.uri.endpoint; - client_configuration.maxConnections = static_cast(upd.rw_settings.max_connections); + client_configuration.maxConnections = static_cast(upd.request_settings.max_connections); auto credentials = Aws::Auth::AWSCredentials(upd.auth_settings.access_key_id, upd.auth_settings.secret_access_key); auto headers = upd.auth_settings.headers; @@ -1084,17 +1084,17 @@ void StorageS3::processNamedCollectionResult(StorageS3Configuration & configurat else if (arg_name == "use_environment_credentials") configuration.auth_settings.use_environment_credentials = checkAndGetLiteralArgument(arg_value, "use_environment_credentials"); else if (arg_name == "max_single_read_retries") - configuration.rw_settings.max_single_read_retries = checkAndGetLiteralArgument(arg_value, "max_single_read_retries"); + configuration.request_settings.max_single_read_retries = checkAndGetLiteralArgument(arg_value, "max_single_read_retries"); else if (arg_name == "min_upload_part_size") - configuration.rw_settings.min_upload_part_size = checkAndGetLiteralArgument(arg_value, "min_upload_part_size"); + configuration.request_settings.min_upload_part_size = checkAndGetLiteralArgument(arg_value, "min_upload_part_size"); else if (arg_name == "upload_part_size_multiply_factor") - configuration.rw_settings.upload_part_size_multiply_factor = checkAndGetLiteralArgument(arg_value, "upload_part_size_multiply_factor"); + configuration.request_settings.upload_part_size_multiply_factor = checkAndGetLiteralArgument(arg_value, "upload_part_size_multiply_factor"); else if (arg_name == "upload_part_size_multiply_parts_count_threshold") - configuration.rw_settings.upload_part_size_multiply_parts_count_threshold = checkAndGetLiteralArgument(arg_value, "upload_part_size_multiply_parts_count_threshold"); + configuration.request_settings.upload_part_size_multiply_parts_count_threshold = checkAndGetLiteralArgument(arg_value, "upload_part_size_multiply_parts_count_threshold"); else if (arg_name == "max_single_part_upload_size") - configuration.rw_settings.max_single_part_upload_size = checkAndGetLiteralArgument(arg_value, "max_single_part_upload_size"); + configuration.request_settings.max_single_part_upload_size = checkAndGetLiteralArgument(arg_value, "max_single_part_upload_size"); else if (arg_name == "max_connections") - configuration.rw_settings.max_connections = checkAndGetLiteralArgument(arg_value, "max_connections"); + configuration.request_settings.max_connections = checkAndGetLiteralArgument(arg_value, "max_connections"); else throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Unknown key-value argument `{}` for StorageS3, expected: url, [access_key_id, secret_access_key], name of used format and [compression_method].", @@ -1162,7 +1162,7 @@ ColumnsDescription StorageS3::getTableStructureFromData( S3Configuration s3_configuration{ configuration.url, configuration.auth_settings, - S3Settings::ReadWriteSettings(ctx->getSettingsRef()), + S3Settings::RequestSettings(ctx->getSettingsRef()), configuration.headers}; updateS3Configuration(ctx, s3_configuration); @@ -1224,7 +1224,7 @@ ColumnsDescription StorageS3::getTableStructureFromDataImpl( int zstd_window_log_max = static_cast(ctx->getSettingsRef().zstd_window_log_max); return wrapReadBufferWithCompressionMethod( std::make_unique( - s3_configuration.client, s3_configuration.uri.bucket, key, s3_configuration.uri.version_id, s3_configuration.rw_settings, ctx->getReadSettings()), + s3_configuration.client, s3_configuration.uri.bucket, key, s3_configuration.uri.version_id, s3_configuration.request_settings, ctx->getReadSettings()), chooseCompressionMethod(key, compression_method), zstd_window_log_max); }; diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h index 76391cb2695..8c15ede2b0b 100644 --- a/src/Storages/StorageS3.h +++ b/src/Storages/StorageS3.h @@ -44,7 +44,7 @@ public: ContextPtr context, std::unordered_map * object_infos = nullptr, Strings * read_keys_ = nullptr, - const S3Settings::ReadWriteSettings & rw_settings_ = {}); + const S3Settings::RequestSettings & request_settings_ = {}); String next(); @@ -80,7 +80,7 @@ public: std::optional format_settings_, const ColumnsDescription & columns_, UInt64 max_block_size_, - const S3Settings::ReadWriteSettings & rw_settings_, + const S3Settings::RequestSettings & request_settings_, String compression_hint_, const std::shared_ptr & client_, const String & bucket, @@ -103,7 +103,7 @@ private: String format; ColumnsDescription columns_desc; UInt64 max_block_size; - S3Settings::ReadWriteSettings rw_settings; + S3Settings::RequestSettings request_settings; String compression_hint; std::shared_ptr client; Block sample_block; @@ -187,7 +187,7 @@ public: std::shared_ptr client; S3::AuthSettings auth_settings; - S3Settings::ReadWriteSettings rw_settings; + S3Settings::RequestSettings request_settings; /// If s3 configuration was passed from ast, then it is static. /// If from config - it can be changed with config reload. @@ -199,11 +199,11 @@ public: S3Configuration( const String & url_, const S3::AuthSettings & auth_settings_, - const S3Settings::ReadWriteSettings & rw_settings_, + const S3Settings::RequestSettings & request_settings_, const HeaderCollection & headers_from_ast_) : uri(S3::URI(url_)) , auth_settings(auth_settings_) - , rw_settings(rw_settings_) + , request_settings(request_settings_) , static_configuration(!auth_settings_.access_key_id.empty()) , headers_from_ast(headers_from_ast_) {} }; diff --git a/src/Storages/StorageS3Cluster.cpp b/src/Storages/StorageS3Cluster.cpp index 3b8c8b1cb92..920116527ee 100644 --- a/src/Storages/StorageS3Cluster.cpp +++ b/src/Storages/StorageS3Cluster.cpp @@ -46,7 +46,7 @@ StorageS3Cluster::StorageS3Cluster( const ConstraintsDescription & constraints_, ContextPtr context_) : IStorage(table_id_) - , s3_configuration{configuration_.url, configuration_.auth_settings, configuration_.rw_settings, configuration_.headers} + , s3_configuration{configuration_.url, configuration_.auth_settings, configuration_.request_settings, configuration_.headers} , filename(configuration_.url) , cluster_name(configuration_.cluster_name) , format_name(configuration_.format) diff --git a/src/Storages/StorageS3Settings.cpp b/src/Storages/StorageS3Settings.cpp index 9e670c65e17..e24b892dbac 100644 --- a/src/Storages/StorageS3Settings.cpp +++ b/src/Storages/StorageS3Settings.cpp @@ -48,24 +48,24 @@ void StorageS3Settings::loadFromConfig(const String & config_elem, const Poco::U auto auth_settings = S3::AuthSettings::loadFromConfig(config_elem + "." + key, config); - S3Settings::ReadWriteSettings rw_settings; - rw_settings.max_single_read_retries = get_uint_for_key(key, "max_single_read_retries", true, settings.s3_max_single_read_retries); - rw_settings.min_upload_part_size = get_uint_for_key(key, "min_upload_part_size", true, settings.s3_min_upload_part_size); - rw_settings.upload_part_size_multiply_factor = get_uint_for_key(key, "upload_part_size_multiply_factor", true, settings.s3_upload_part_size_multiply_factor); - rw_settings.upload_part_size_multiply_parts_count_threshold = get_uint_for_key(key, "upload_part_size_multiply_parts_count_threshold", true, settings.s3_upload_part_size_multiply_parts_count_threshold); - rw_settings.max_single_part_upload_size = get_uint_for_key(key, "max_single_part_upload_size", true, settings.s3_max_single_part_upload_size); - rw_settings.max_connections = get_uint_for_key(key, "max_connections", true, settings.s3_max_connections); - rw_settings.check_objects_after_upload = get_bool_for_key(key, "check_objects_after_upload", true, false); + S3Settings::RequestSettings request_settings; + request_settings.max_single_read_retries = get_uint_for_key(key, "max_single_read_retries", true, settings.s3_max_single_read_retries); + request_settings.min_upload_part_size = get_uint_for_key(key, "min_upload_part_size", true, settings.s3_min_upload_part_size); + request_settings.upload_part_size_multiply_factor = get_uint_for_key(key, "upload_part_size_multiply_factor", true, settings.s3_upload_part_size_multiply_factor); + request_settings.upload_part_size_multiply_parts_count_threshold = get_uint_for_key(key, "upload_part_size_multiply_parts_count_threshold", true, settings.s3_upload_part_size_multiply_parts_count_threshold); + request_settings.max_single_part_upload_size = get_uint_for_key(key, "max_single_part_upload_size", true, settings.s3_max_single_part_upload_size); + request_settings.max_connections = get_uint_for_key(key, "max_connections", true, settings.s3_max_connections); + request_settings.check_objects_after_upload = get_bool_for_key(key, "check_objects_after_upload", true, false); // NOTE: it would be better to reuse old throttlers to avoid losing token bucket state on every config reload, which could lead to exceeding limit for short time. But it is good enough unless very high `burst` values are used. if (UInt64 max_get_rps = get_uint_for_key(key, "max_get_rps", true, settings.s3_max_get_rps)) - rw_settings.get_request_throttler = std::make_shared( + request_settings.get_request_throttler = std::make_shared( max_get_rps, get_uint_for_key(key, "max_get_burst", true, settings.s3_max_get_burst ? settings.s3_max_get_burst : Throttler::default_burst_seconds * max_get_rps)); if (UInt64 max_put_rps = get_uint_for_key(key, "max_put_rps", true, settings.s3_max_put_rps)) - rw_settings.put_request_throttler = std::make_shared( + request_settings.put_request_throttler = std::make_shared( max_put_rps, get_uint_for_key(key, "max_put_burst", true, settings.s3_max_put_burst ? settings.s3_max_put_burst : Throttler::default_burst_seconds * max_put_rps)); - s3_settings.emplace(endpoint, S3Settings{std::move(auth_settings), std::move(rw_settings)}); + s3_settings.emplace(endpoint, S3Settings{std::move(auth_settings), std::move(request_settings)}); } } } @@ -86,7 +86,7 @@ S3Settings StorageS3Settings::getSettings(const String & endpoint) const return {}; } -S3Settings::ReadWriteSettings::ReadWriteSettings(const Settings & settings) +S3Settings::RequestSettings::RequestSettings(const Settings & settings) { max_single_read_retries = settings.s3_max_single_read_retries; min_upload_part_size = settings.s3_min_upload_part_size; @@ -104,7 +104,7 @@ S3Settings::ReadWriteSettings::ReadWriteSettings(const Settings & settings) settings.s3_max_put_rps, settings.s3_max_put_burst ? settings.s3_max_put_burst : Throttler::default_burst_seconds * settings.s3_max_put_rps); } -void S3Settings::ReadWriteSettings::updateFromSettingsIfEmpty(const Settings & settings) +void S3Settings::RequestSettings::updateFromSettingsIfEmpty(const Settings & settings) { if (!max_single_read_retries) max_single_read_retries = settings.s3_max_single_read_retries; diff --git a/src/Storages/StorageS3Settings.h b/src/Storages/StorageS3Settings.h index 40ba11a19ba..509a690ef8c 100644 --- a/src/Storages/StorageS3Settings.h +++ b/src/Storages/StorageS3Settings.h @@ -24,7 +24,7 @@ struct Settings; struct S3Settings { - struct ReadWriteSettings + struct RequestSettings { size_t max_single_read_retries = 0; size_t min_upload_part_size = 0; @@ -37,10 +37,10 @@ struct S3Settings ThrottlerPtr get_request_throttler; ThrottlerPtr put_request_throttler; - ReadWriteSettings() = default; - explicit ReadWriteSettings(const Settings & settings); + RequestSettings() = default; + explicit RequestSettings(const Settings & settings); - inline bool operator==(const ReadWriteSettings & other) const + inline bool operator==(const RequestSettings & other) const { return max_single_read_retries == other.max_single_read_retries && min_upload_part_size == other.min_upload_part_size @@ -58,11 +58,11 @@ struct S3Settings }; S3::AuthSettings auth_settings; - ReadWriteSettings rw_settings; + RequestSettings request_settings; inline bool operator==(const S3Settings & other) const { - return auth_settings == other.auth_settings && rw_settings == other.rw_settings; + return auth_settings == other.auth_settings && request_settings == other.request_settings; } }; From 2de26daa5630e38076f96ba600cb1594edab32d7 Mon Sep 17 00:00:00 2001 From: serxa Date: Tue, 8 Nov 2022 14:31:29 +0000 Subject: [PATCH 13/80] fix build --- src/Backups/BackupIO_S3.cpp | 8 ++++---- src/Storages/StorageDelta.cpp | 6 ++++-- src/Storages/StorageHudi.cpp | 2 +- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/src/Backups/BackupIO_S3.cpp b/src/Backups/BackupIO_S3.cpp index e953b441c1b..5a4e804a778 100644 --- a/src/Backups/BackupIO_S3.cpp +++ b/src/Backups/BackupIO_S3.cpp @@ -247,10 +247,10 @@ void BackupWriterS3::copyObjectMultipartImpl( position = next_position; - if (part_number % rw_settings.upload_part_size_multiply_parts_count_threshold == 0) + if (part_number % request_settings.upload_part_size_multiply_parts_count_threshold == 0) { - upload_part_size *= rw_settings.upload_part_size_multiply_factor; - upload_part_size = std::min(upload_part_size, rw_settings.max_upload_part_size); + upload_part_size *= request_settings.upload_part_size_multiply_factor; + upload_part_size = std::min(upload_part_size, request_settings.max_upload_part_size); } } @@ -293,7 +293,7 @@ void BackupWriterS3::copyFileNative(DiskPtr from_disk, const String & file_name_ auto file_path = fs::path(s3_uri.key) / file_name_to; auto head = requestObjectHeadData(source_bucket, objects[0].absolute_path).GetResult(); - if (static_cast(head.GetContentLength()) < rw_settings.max_single_operation_copy_size) + if (static_cast(head.GetContentLength()) < request_settings.max_single_operation_copy_size) { copyObjectImpl( source_bucket, objects[0].absolute_path, s3_uri.bucket, file_path, head); diff --git a/src/Storages/StorageDelta.cpp b/src/Storages/StorageDelta.cpp index e8287a2fd61..c077b24c610 100644 --- a/src/Storages/StorageDelta.cpp +++ b/src/Storages/StorageDelta.cpp @@ -151,12 +151,14 @@ std::vector JsonMetadataGetter::getJsonLogFiles() std::shared_ptr JsonMetadataGetter::createS3ReadBuffer(const String & key, ContextPtr context) { /// TODO: add parallel downloads + S3Settings::RequestSettings request_settings; + request_settings.max_single_read_retries = 10; return std::make_shared( base_configuration.client, base_configuration.uri.bucket, key, base_configuration.uri.version_id, - /* max single read retries */10, + request_settings, context->getReadSettings()); } @@ -187,7 +189,7 @@ StorageDelta::StorageDelta( ContextPtr context_, std::optional format_settings_) : IStorage(table_id_) - , base_configuration{configuration_.url, configuration_.auth_settings, configuration_.rw_settings, configuration_.headers} + , base_configuration{configuration_.url, configuration_.auth_settings, configuration_.request_settings, configuration_.headers} , log(&Poco::Logger::get("StorageDeltaLake (" + table_id_.table_name + ")")) , table_path(base_configuration.uri.key) { diff --git a/src/Storages/StorageHudi.cpp b/src/Storages/StorageHudi.cpp index 121856c4a57..4b20e4cbd22 100644 --- a/src/Storages/StorageHudi.cpp +++ b/src/Storages/StorageHudi.cpp @@ -37,7 +37,7 @@ StorageHudi::StorageHudi( ContextPtr context_, std::optional format_settings_) : IStorage(table_id_) - , base_configuration{configuration_.url, configuration_.auth_settings, configuration_.rw_settings, configuration_.headers} + , base_configuration{configuration_.url, configuration_.auth_settings, configuration_.request_settings, configuration_.headers} , log(&Poco::Logger::get("StorageHudi (" + table_id_.table_name + ")")) , table_path(base_configuration.uri.key) { From 2720cddeecfe0fb9517e29f64304df8d674f0886 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Wed, 9 Nov 2022 04:28:02 +0000 Subject: [PATCH 14/80] Add support for interactive parameters in INSERT VALUES queries --- src/Client/ClientBase.cpp | 8 ++++++++ .../0_stateless/02476_query_parameters_insert.reference | 1 + .../queries/0_stateless/02476_query_parameters_insert.sql | 8 ++++++++ 3 files changed, 17 insertions(+) create mode 100644 tests/queries/0_stateless/02476_query_parameters_insert.reference create mode 100644 tests/queries/0_stateless/02476_query_parameters_insert.sql diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 5d7de8ec799..87c92b91d31 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -1542,6 +1542,14 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin global_context->applySettingChange(change); } global_context->resetSettingsToDefaultValue(set_query->default_settings); + + /// Query parameters inside SET queries should be also saved on the client side + /// to override their previous definitions set with --param_* arguments + /// and for substitutions to work inside INSERT ... VALUES queries + for (const auto & [name, value] : set_query->query_parameters) + query_parameters.insert_or_assign(name, value); + + global_context->addQueryParameters(set_query->query_parameters); } if (const auto * use_query = parsed_query->as()) { diff --git a/tests/queries/0_stateless/02476_query_parameters_insert.reference b/tests/queries/0_stateless/02476_query_parameters_insert.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/02476_query_parameters_insert.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/02476_query_parameters_insert.sql b/tests/queries/0_stateless/02476_query_parameters_insert.sql new file mode 100644 index 00000000000..de866ccbc4d --- /dev/null +++ b/tests/queries/0_stateless/02476_query_parameters_insert.sql @@ -0,0 +1,8 @@ +DROP TABLE IF EXISTS 02476_query_parameters_insert; +CREATE TABLE 02476_query_parameters_insert (x Int32) ENGINE=MergeTree() ORDER BY tuple(); + +SET param_x = 1; +INSERT INTO 02476_query_parameters_insert VALUES ({x: Int32}); +SELECT * FROM 02476_query_parameters_insert; + +DROP TABLE 02476_query_parameters_insert; From 2ad8c5829fa1381214858cf20de98fcc33d96fac Mon Sep 17 00:00:00 2001 From: serxa Date: Wed, 9 Nov 2022 14:18:56 +0000 Subject: [PATCH 15/80] add test --- .../02477_s3_request_throttler.reference | 1 + .../0_stateless/02477_s3_request_throttler.sql | 15 +++++++++++++++ 2 files changed, 16 insertions(+) create mode 100644 tests/queries/0_stateless/02477_s3_request_throttler.reference create mode 100644 tests/queries/0_stateless/02477_s3_request_throttler.sql diff --git a/tests/queries/0_stateless/02477_s3_request_throttler.reference b/tests/queries/0_stateless/02477_s3_request_throttler.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/02477_s3_request_throttler.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/02477_s3_request_throttler.sql b/tests/queries/0_stateless/02477_s3_request_throttler.sql new file mode 100644 index 00000000000..7311c8ac9bc --- /dev/null +++ b/tests/queries/0_stateless/02477_s3_request_throttler.sql @@ -0,0 +1,15 @@ +-- Tags: no-fasttest +-- Tag no-fasttest: needs s3 + +-- Limit S3 PUT request per second rate +SET s3_max_put_rps = 2; +SET s3_max_put_burst = 1; + +CREATE TEMPORARY TABLE times (t DateTime); + +-- INSERT query requires 3 PUT requests (CreateMultipartUpload, UploadPart, CompleteMultipartUpload) and 1/rps = 0.5 second in between, the first query is not throttled due to burst +INSERT INTO times SELECT now(); +INSERT INTO TABLE FUNCTION s3('http://localhost:11111/test/request-throttler.csv', 'test', 'testtest', 'CSV', 'number UInt64') SELECT number FROM numbers(1000000) SETTINGS s3_max_single_part_upload_size = 10000, s3_truncate_on_insert = 1; +INSERT INTO times SELECT now(); + +SELECT max(t) - min(t) >= 1 FROM times; From 7448d1bb5d7c89a769c8906a0dd149a1e493c8ff Mon Sep 17 00:00:00 2001 From: serxa Date: Wed, 9 Nov 2022 14:58:41 +0000 Subject: [PATCH 16/80] add docs --- docs/en/engines/table-engines/integrations/s3.md | 5 +++++ docs/en/engines/table-engines/mergetree-family/mergetree.md | 4 ++++ src/Core/Settings.h | 4 ++-- 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/docs/en/engines/table-engines/integrations/s3.md b/docs/en/engines/table-engines/integrations/s3.md index db983ab9c68..484fd265c3d 100644 --- a/docs/en/engines/table-engines/integrations/s3.md +++ b/docs/en/engines/table-engines/integrations/s3.md @@ -127,6 +127,10 @@ The following settings can be set before query execution or placed into configur - `s3_min_upload_part_size` — The minimum size of part to upload during multipart upload to [S3 Multipart upload](https://docs.aws.amazon.com/AmazonS3/latest/dev/uploadobjusingmpu.html). Default value is `512Mb`. - `s3_max_redirects` — Max number of S3 redirects hops allowed. Default value is `10`. - `s3_single_read_retries` — The maximum number of attempts during single read. Default value is `4`. +- `s3_max_put_rps` — Maximum PUT requests per second rate before throttling. Default value is `0` (unlimited). +- `s3_max_put_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_put_rps`. +- `s3_max_get_rps` — Maximum GET requests per second rate before throttling. Default value is `0` (unlimited). +- `s3_max_get_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_get_rps`. Security consideration: if malicious user can specify arbitrary S3 URLs, `s3_max_redirects` must be set to zero to avoid [SSRF](https://en.wikipedia.org/wiki/Server-side_request_forgery) attacks; or alternatively, `remote_host_filter` must be specified in server configuration. @@ -142,6 +146,7 @@ The following settings can be specified in configuration file for given endpoint - `header` — Adds specified HTTP header to a request to given endpoint. Optional, can be specified multiple times. - `server_side_encryption_customer_key_base64` — If specified, required headers for accessing S3 objects with SSE-C encryption will be set. Optional. - `max_single_read_retries` — The maximum number of attempts during single read. Default value is `4`. Optional. +- `max_put_rps`, `max_put_burst`, `max_get_rps` and `max_get_burst` - Throttling settings (see description above) to use for specific endpoint instead of per query. Optional. **Example:** diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index 486baac2310..4fe286a2098 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -940,6 +940,10 @@ Optional parameters: - `cache_path` — Path on local FS where to store cached mark and index files. Default value is `/var/lib/clickhouse/disks//cache/`. - `skip_access_check` — If true, disk access checks will not be performed on disk start-up. Default value is `false`. - `server_side_encryption_customer_key_base64` — If specified, required headers for accessing S3 objects with SSE-C encryption will be set. +- `s3_max_put_rps` — Maximum PUT requests per second rate before throttling. Default value is `0` (unlimited). +- `s3_max_put_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_put_rps`. +- `s3_max_get_rps` — Maximum GET requests per second rate before throttling. Default value is `0` (unlimited). +- `s3_max_get_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_get_rps`. S3 disk can be configured as `main` or `cold` storage: ``` xml diff --git a/src/Core/Settings.h b/src/Core/Settings.h index e36ff1985fb..2f8aca6c628 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -90,9 +90,9 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) M(UInt64, s3_max_unexpected_write_error_retries, 4, "The maximum number of retries in case of unexpected errors during S3 write.", 0) \ M(UInt64, s3_max_redirects, 10, "Max number of S3 redirects hops allowed.", 0) \ M(UInt64, s3_max_connections, 1024, "The maximum number of connections per server.", 0) \ - M(UInt64, s3_max_get_rps, 0, "Limit on S3 GET request per second rate. Zero means unlimited.", 0) \ + M(UInt64, s3_max_get_rps, 0, "Limit on S3 GET request per second rate before throttling. Zero means unlimited.", 0) \ M(UInt64, s3_max_get_burst, 0, "Max number of requests that can be issued simultaneously before hitting request per second limit. By default (0) equals to `s3_max_get_rps`", 0) \ - M(UInt64, s3_max_put_rps, 0, "Limit on S3 PUT request per second rate. Zero means unlimited.", 0) \ + M(UInt64, s3_max_put_rps, 0, "Limit on S3 PUT request per second rate before throttling. Zero means unlimited.", 0) \ M(UInt64, s3_max_put_burst, 0, "Max number of requests that can be issued simultaneously before hitting request per second limit. By default (0) equals to `s3_max_put_rps`", 0) \ M(Bool, s3_truncate_on_insert, false, "Enables or disables truncate before insert in s3 engine tables.", 0) \ M(Bool, s3_create_new_file_on_insert, false, "Enables or disables creating a new file on each insert in s3 engine tables", 0) \ From 839de0cd092420356846c6a53468e603f4200594 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Thu, 10 Nov 2022 16:03:11 +0800 Subject: [PATCH 17/80] add function factorial --- .../sql-reference/functions/math-functions.md | 30 +++++++++++++++++++ .../00087_math_functions.reference | 3 ++ .../0_stateless/00087_math_functions.sql | 7 +++++ 3 files changed, 40 insertions(+) diff --git a/docs/en/sql-reference/functions/math-functions.md b/docs/en/sql-reference/functions/math-functions.md index 430762a1885..bfd2f63dc5b 100644 --- a/docs/en/sql-reference/functions/math-functions.md +++ b/docs/en/sql-reference/functions/math-functions.md @@ -549,3 +549,33 @@ Result: │ 3.141592653589793 │ └───────────────────┘ ``` + + +## factorial(n) + +Computes the factorial of an integer value. It works with any native integer type including UInt(8|16|32|64) and Int(8|16|32|64). The return type is Int64. + +The factorial of 0 is 1. Likewise, the factorial() function returns 1 for any negative value. The maximum positive value for the input argument is 20; a value of 21 or greater overflows the range for Int64 and will causes an throw exception. + + +**Syntax** + +``` sql +factorial(n) +``` + +**Example** + +Query: + +``` sql +SELECT factorial(10); +``` + +Result: + +``` text +┌─factorial(10)─┐ +│ 3628800 │ +└───────────────┘ +``` diff --git a/tests/queries/0_stateless/00087_math_functions.reference b/tests/queries/0_stateless/00087_math_functions.reference index e02aac11faf..48cb24098ff 100644 --- a/tests/queries/0_stateless/00087_math_functions.reference +++ b/tests/queries/0_stateless/00087_math_functions.reference @@ -112,3 +112,6 @@ 1 1 1 +1 +1 +1 diff --git a/tests/queries/0_stateless/00087_math_functions.sql b/tests/queries/0_stateless/00087_math_functions.sql index e40acfb3481..11456944be3 100644 --- a/tests/queries/0_stateless/00087_math_functions.sql +++ b/tests/queries/0_stateless/00087_math_functions.sql @@ -137,3 +137,10 @@ select erf(10) = 1; select erfc(0) = 1; select erfc(-10) = 2; select erfc(28) = 0; + +select factorial(-1) = 1; +select factorial(0) = 1; +select factorial(10) = 3628800; +select factorial(100); -- { serverError 36 } +select factorial('100'); -- { serverError 43 } +select factorial(100.1234); -- { serverError 43 } From d76c271242b9f4a5ce025d6096857bb2d84adeff Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Thu, 10 Nov 2022 16:08:22 +0800 Subject: [PATCH 18/80] add missed file --- src/Functions/factorial.cpp | 103 ++++++++++++++++++++++++++++++++++++ 1 file changed, 103 insertions(+) create mode 100644 src/Functions/factorial.cpp diff --git a/src/Functions/factorial.cpp b/src/Functions/factorial.cpp new file mode 100644 index 00000000000..d92cb668214 --- /dev/null +++ b/src/Functions/factorial.cpp @@ -0,0 +1,103 @@ +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int BAD_ARGUMENTS; +} + +template +struct FactorialImpl +{ + using ResultType = Int64; + static const constexpr bool allow_decimal = false; + static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_string_integer = false; + + static inline NO_SANITIZE_UNDEFINED ResultType apply(A a) + { + if constexpr (std::is_floating_point_v || is_over_big_int) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type of argument of function factorial, should not be floating point or big int"); + + if constexpr (is_integer) + { + if (a > 20) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The maximum value for the input argument of function factorial is 20"); + + if constexpr (is_unsigned_v) + return factorials[a]; + else if constexpr (is_signed_v) + return a >= 0 ? factorials[a] : 1; + } + } + +#if USE_EMBEDDED_COMPILER + static constexpr bool compilable = false; /// special type handling, some other time +#endif + +private: + static const constexpr ResultType factorials[21] + = {1, + 1, + 2, + 6, + 24, + 120, + 720, + 5040, + 40320, + 362880, + 3628800, + 39916800, + 479001600, + 6227020800L, + 87178291200L, + 1307674368000L, + 20922789888000L, + 355687428096000L, + 6402373705728000L, + 121645100408832000L, + 2432902008176640000L}; +}; + +struct NameFactorial { static constexpr auto name = "factorial"; }; +using FunctionFactorial = FunctionUnaryArithmetic; + +template <> struct FunctionUnaryArithmeticMonotonicity +{ + static bool has() { return true; } + + static IFunction::Monotonicity get(const Field & /*left*/, const Field & /*right*/) + { + return { + .is_monotonic = true, + .is_positive = true, + .is_always_monotonic = true, + .is_strict = false, + }; + } +}; + + +REGISTER_FUNCTION(Factorial) +{ + factory.registerFunction( + { + R"( +Computes the factorial of an integer value. It works with any native integer type. The return type is Int64. + +The factorial of 0 is 1. Likewise, the factorial() function returns 1 for any negative value. The maximum positive value for the input argument is 20; a value of 21 or greater overflows the range for Int64 and will causes an throw exception. +)", + Documentation::Examples{{"factorial", "SELECT factorial(10)"}}, + Documentation::Categories{"Mathematical"}}, + FunctionFactory::CaseInsensitive); +} + +} From a1fd2752094f9ccdd01f134e46b0cc0ff68b2c23 Mon Sep 17 00:00:00 2001 From: Alfonso Martinez Date: Thu, 10 Nov 2022 09:13:52 +0100 Subject: [PATCH 19/80] Fixed duplicated includes --- src/Functions/FunctionsJSON.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/Functions/FunctionsJSON.cpp b/src/Functions/FunctionsJSON.cpp index a89a2b24b9e..b4b15a25047 100644 --- a/src/Functions/FunctionsJSON.cpp +++ b/src/Functions/FunctionsJSON.cpp @@ -21,11 +21,6 @@ #include #include -#include -#include -#include -#include -#include #include #include #include From cbdb2b0be471cba9722bc22efcca04bcac922fd4 Mon Sep 17 00:00:00 2001 From: serxa Date: Thu, 10 Nov 2022 12:19:06 +0000 Subject: [PATCH 20/80] add more asserts in test --- .../02477_s3_request_throttler.reference | 1 + .../0_stateless/02477_s3_request_throttler.sql | 13 ++++++++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02477_s3_request_throttler.reference b/tests/queries/0_stateless/02477_s3_request_throttler.reference index d00491fd7e5..9315e86b328 100644 --- a/tests/queries/0_stateless/02477_s3_request_throttler.reference +++ b/tests/queries/0_stateless/02477_s3_request_throttler.reference @@ -1 +1,2 @@ 1 +1 1 1 diff --git a/tests/queries/0_stateless/02477_s3_request_throttler.sql b/tests/queries/0_stateless/02477_s3_request_throttler.sql index 7311c8ac9bc..25653b1bab0 100644 --- a/tests/queries/0_stateless/02477_s3_request_throttler.sql +++ b/tests/queries/0_stateless/02477_s3_request_throttler.sql @@ -7,9 +7,20 @@ SET s3_max_put_burst = 1; CREATE TEMPORARY TABLE times (t DateTime); --- INSERT query requires 3 PUT requests (CreateMultipartUpload, UploadPart, CompleteMultipartUpload) and 1/rps = 0.5 second in between, the first query is not throttled due to burst +-- INSERT query requires 3 PUT requests and 1/rps = 0.5 second in between, the first query is not throttled due to burst INSERT INTO times SELECT now(); INSERT INTO TABLE FUNCTION s3('http://localhost:11111/test/request-throttler.csv', 'test', 'testtest', 'CSV', 'number UInt64') SELECT number FROM numbers(1000000) SETTINGS s3_max_single_part_upload_size = 10000, s3_truncate_on_insert = 1; INSERT INTO times SELECT now(); SELECT max(t) - min(t) >= 1 FROM times; + +SYSTEM FLUSH LOGS; +SELECT ProfileEvents['S3CreateMultipartUpload'] == 1, + ProfileEvents['S3UploadPart'] == 1, + ProfileEvents['S3CompleteMultipartUpload'] == 1 +FROM system.query_log +WHERE query LIKE '%request-throttler.csv%' +AND type = 'QueryFinish' +AND current_database = currentDatabase() +ORDER BY query_start_time DESC +LIMIT 1; From f58e960a7dbe71b54d69ec06d9d54967d0d97ea8 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Thu, 10 Nov 2022 21:07:44 +0800 Subject: [PATCH 21/80] update as request --- .../sql-reference/functions/math-functions.md | 2 +- src/Functions/factorial.cpp | 18 ++++++++++++++---- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/docs/en/sql-reference/functions/math-functions.md b/docs/en/sql-reference/functions/math-functions.md index bfd2f63dc5b..7f349686d8b 100644 --- a/docs/en/sql-reference/functions/math-functions.md +++ b/docs/en/sql-reference/functions/math-functions.md @@ -555,7 +555,7 @@ Result: Computes the factorial of an integer value. It works with any native integer type including UInt(8|16|32|64) and Int(8|16|32|64). The return type is Int64. -The factorial of 0 is 1. Likewise, the factorial() function returns 1 for any negative value. The maximum positive value for the input argument is 20; a value of 21 or greater overflows the range for Int64 and will causes an throw exception. +The factorial of 0 is 1. Likewise, the factorial() function returns 1 for any negative value. The maximum positive value for the input argument is 20, a value of 21 or greater overflows the range for Int64 and will cause exception throw. **Syntax** diff --git a/src/Functions/factorial.cpp b/src/Functions/factorial.cpp index d92cb668214..5c46b97c193 100644 --- a/src/Functions/factorial.cpp +++ b/src/Functions/factorial.cpp @@ -1,6 +1,7 @@ #include #include #include +#include namespace DB { @@ -74,13 +75,22 @@ template <> struct FunctionUnaryArithmeticMonotonicity { static bool has() { return true; } - static IFunction::Monotonicity get(const Field & /*left*/, const Field & /*right*/) + static IFunction::Monotonicity get(const Field & left, const Field & right) { + bool is_strict = false; + if (!left.isNull() && !right.isNull()) + { + auto left_value = applyVisitor(FieldVisitorConvertToNumber(), left); + auto right_value = applyVisitor(FieldVisitorConvertToNumber(), left); + if (1 <= left_value && left_value <= right_value && right_value <= 20) + is_strict = true; + } + return { .is_monotonic = true, .is_positive = true, .is_always_monotonic = true, - .is_strict = false, + .is_strict = is_strict, }; } }; @@ -91,9 +101,9 @@ REGISTER_FUNCTION(Factorial) factory.registerFunction( { R"( -Computes the factorial of an integer value. It works with any native integer type. The return type is Int64. +Computes the factorial of an integer value. It works with any native integer type including UInt(8|16|32|64) and Int(8|16|32|64). The return type is Int64. -The factorial of 0 is 1. Likewise, the factorial() function returns 1 for any negative value. The maximum positive value for the input argument is 20; a value of 21 or greater overflows the range for Int64 and will causes an throw exception. +The factorial of 0 is 1. Likewise, the factorial() function returns 1 for any negative value. The maximum positive value for the input argument is 20, a value of 21 or greater overflows the range for Int64 and will cause exception throw. )", Documentation::Examples{{"factorial", "SELECT factorial(10)"}}, Documentation::Categories{"Mathematical"}}, From 8a7c7bfb3df3084d01c621e34338b7363d029cc8 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Thu, 10 Nov 2022 21:12:55 +0800 Subject: [PATCH 22/80] extract test of function factorial --- tests/queries/0_stateless/00087_math_functions.reference | 3 --- tests/queries/0_stateless/00087_math_functions.sql | 7 ------- tests/queries/0_stateless/02478_factorial.reference | 3 +++ tests/queries/0_stateless/02478_factorial.sql | 7 +++++++ 4 files changed, 10 insertions(+), 10 deletions(-) create mode 100644 tests/queries/0_stateless/02478_factorial.reference create mode 100644 tests/queries/0_stateless/02478_factorial.sql diff --git a/tests/queries/0_stateless/00087_math_functions.reference b/tests/queries/0_stateless/00087_math_functions.reference index 48cb24098ff..e02aac11faf 100644 --- a/tests/queries/0_stateless/00087_math_functions.reference +++ b/tests/queries/0_stateless/00087_math_functions.reference @@ -112,6 +112,3 @@ 1 1 1 -1 -1 -1 diff --git a/tests/queries/0_stateless/00087_math_functions.sql b/tests/queries/0_stateless/00087_math_functions.sql index 11456944be3..e40acfb3481 100644 --- a/tests/queries/0_stateless/00087_math_functions.sql +++ b/tests/queries/0_stateless/00087_math_functions.sql @@ -137,10 +137,3 @@ select erf(10) = 1; select erfc(0) = 1; select erfc(-10) = 2; select erfc(28) = 0; - -select factorial(-1) = 1; -select factorial(0) = 1; -select factorial(10) = 3628800; -select factorial(100); -- { serverError 36 } -select factorial('100'); -- { serverError 43 } -select factorial(100.1234); -- { serverError 43 } diff --git a/tests/queries/0_stateless/02478_factorial.reference b/tests/queries/0_stateless/02478_factorial.reference new file mode 100644 index 00000000000..e8183f05f5d --- /dev/null +++ b/tests/queries/0_stateless/02478_factorial.reference @@ -0,0 +1,3 @@ +1 +1 +1 diff --git a/tests/queries/0_stateless/02478_factorial.sql b/tests/queries/0_stateless/02478_factorial.sql new file mode 100644 index 00000000000..e1a0f7d60e5 --- /dev/null +++ b/tests/queries/0_stateless/02478_factorial.sql @@ -0,0 +1,7 @@ +select factorial(-1) = 1; +select factorial(0) = 1; +select factorial(10) = 3628800; + +select factorial(100); -- { serverError 36 } +select factorial('100'); -- { serverError 43 } +select factorial(100.1234); -- { serverError 43 } From 953457de281d1167f51d91de9c3ca8df32780b30 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 10 Nov 2022 22:41:26 +0100 Subject: [PATCH 23/80] Remove POCO_CLICKHOUSE_PATCH Signed-off-by: Azat Khuzhin --- programs/keeper/Keeper.cpp | 12 ------------ programs/server/Server.cpp | 12 ------------ src/Common/Exception.h | 6 +----- src/Daemon/BaseDaemon.h | 6 +----- src/IO/HTTPCommon.cpp | 11 ----------- 5 files changed, 2 insertions(+), 45 deletions(-) diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index e1d03b40b66..a1bf324f482 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -149,19 +149,7 @@ std::string getUserName(uid_t user_id) Poco::Net::SocketAddress Keeper::socketBindListen(Poco::Net::ServerSocket & socket, const std::string & host, UInt16 port, [[maybe_unused]] bool secure) const { auto address = makeSocketAddress(host, port, &logger()); -#if !defined(POCO_CLICKHOUSE_PATCH) || POCO_VERSION < 0x01090100 - if (secure) - /// Bug in old (<1.9.1) poco, listen() after bind() with reusePort param will fail because have no implementation in SecureServerSocketImpl - /// https://github.com/pocoproject/poco/pull/2257 - socket.bind(address, /* reuseAddress = */ true); - else -#endif -#if POCO_VERSION < 0x01080000 - socket.bind(address, /* reuseAddress = */ true); -#else socket.bind(address, /* reuseAddress = */ true, /* reusePort = */ config().getBool("listen_reuse_port", false)); -#endif - socket.listen(/* backlog = */ config().getUInt("listen_backlog", 64)); return address; diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index b412b579539..0622303a093 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -341,19 +341,7 @@ Poco::Net::SocketAddress Server::socketBindListen( [[maybe_unused]] bool secure) const { auto address = makeSocketAddress(host, port, &logger()); -#if !defined(POCO_CLICKHOUSE_PATCH) || POCO_VERSION < 0x01090100 - if (secure) - /// Bug in old (<1.9.1) poco, listen() after bind() with reusePort param will fail because have no implementation in SecureServerSocketImpl - /// https://github.com/pocoproject/poco/pull/2257 - socket.bind(address, /* reuseAddress = */ true); - else -#endif -#if POCO_VERSION < 0x01080000 - socket.bind(address, /* reuseAddress = */ true); -#else socket.bind(address, /* reuseAddress = */ true, /* reusePort = */ config.getBool("listen_reuse_port", false)); -#endif - /// If caller requests any available port from the OS, discover it after binding. if (port == 0) { diff --git a/src/Common/Exception.h b/src/Common/Exception.h index c5259d157b2..e26a9690f67 100644 --- a/src/Common/Exception.h +++ b/src/Common/Exception.h @@ -121,11 +121,7 @@ public: } - std::string displayText() const -#if defined(POCO_CLICKHOUSE_PATCH) - override -#endif - ; + std::string displayText() const override; ssize_t getLineNumber() const { return line_number; } void setLineNumber(int line_number_) { line_number = line_number_;} diff --git a/src/Daemon/BaseDaemon.h b/src/Daemon/BaseDaemon.h index d248ad9cec9..ae64651caed 100644 --- a/src/Daemon/BaseDaemon.h +++ b/src/Daemon/BaseDaemon.h @@ -136,11 +136,7 @@ protected: /// fork the main process and watch if it was killed void setupWatchdog(); - void waitForTerminationRequest() -#if defined(POCO_CLICKHOUSE_PATCH) || POCO_VERSION >= 0x02000000 // in old upstream poco not vitrual - override -#endif - ; + void waitForTerminationRequest() override; /// thread safe virtual void onInterruptSignals(int signal_id); diff --git a/src/IO/HTTPCommon.cpp b/src/IO/HTTPCommon.cpp index f33b2399492..c015d4566d6 100644 --- a/src/IO/HTTPCommon.cpp +++ b/src/IO/HTTPCommon.cpp @@ -49,11 +49,7 @@ namespace { void setTimeouts(Poco::Net::HTTPClientSession & session, const ConnectionTimeouts & timeouts) { -#if defined(POCO_CLICKHOUSE_PATCH) || POCO_VERSION >= 0x02000000 session.setTimeout(timeouts.connection_timeout, timeouts.send_timeout, timeouts.receive_timeout); -#else - session.setTimeout(std::max({timeouts.connection_timeout, timeouts.send_timeout, timeouts.receive_timeout})); -#endif session.setKeepAliveTimeout(timeouts.http_keep_alive_timeout); } @@ -93,12 +89,7 @@ namespace ProfileEvents::increment(ProfileEvents::CreatedHTTPConnections); /// doesn't work properly without patch -#if defined(POCO_CLICKHOUSE_PATCH) session->setKeepAlive(keep_alive); -#else - (void)keep_alive; // Avoid warning: unused parameter -#endif - return session; } @@ -122,12 +113,10 @@ namespace session->setProxyHost(proxy_host); session->setProxyPort(proxy_port); -#if defined(POCO_CLICKHOUSE_PATCH) session->setProxyProtocol(proxy_scheme); /// Turn on tunnel mode if proxy scheme is HTTP while endpoint scheme is HTTPS. session->setProxyTunnel(!proxy_https && https); -#endif } return session; } From 5d53ec9da88b0095fe693f441b85186ec792bbf2 Mon Sep 17 00:00:00 2001 From: Alfonso Martinez Date: Fri, 11 Nov 2022 11:07:05 +0100 Subject: [PATCH 24/80] Cleaned and replace some Strings by using chars directly --- src/Functions/FunctionsJSON.cpp | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/src/Functions/FunctionsJSON.cpp b/src/Functions/FunctionsJSON.cpp index b4b15a25047..f8d5f357549 100644 --- a/src/Functions/FunctionsJSON.cpp +++ b/src/Functions/FunctionsJSON.cpp @@ -893,9 +893,17 @@ struct JSONExtractTree // For the non low cardinality case of FixedString, the padding is done in the FixedString Column implementation. // In order to avoid having to pass the data to a FixedString Column and read it back (which would slow down the execution) // the data is padded here and written directly to the Low Cardinality Column - auto padded_str = str.data() + std::string(fixed_length - std::min(fixed_length, str.length()), '\0'); + if (str.size() == fixed_length) + { + assert_cast(dest).insertData(str.data(), str.size()); + } + else + { + String padded_str(str); + padded_str.resize(fixed_length, '\0'); - assert_cast(dest).insertData(padded_str.data(), padded_str.size()); + assert_cast(dest).insertData(padded_str.data(), padded_str.size()); + } return true; } @@ -1230,8 +1238,7 @@ struct JSONExtractTree auto fixed_length = typeid_cast(dictionary_type.get())->getN(); return std::make_unique(fixed_length); } - auto impl = build(function_name, dictionary_type); - return impl; + return build(function_name, dictionary_type); } case TypeIndex::Decimal256: return std::make_unique>(type); case TypeIndex::Decimal128: return std::make_unique>(type); @@ -1387,12 +1394,9 @@ public: { ColumnString::Chars chars; WriteBufferFromVector buf(chars, AppendModeTag()); - chars.push_back(0); traverse(element, buf); buf.finalize(); - std::string str = reinterpret_cast(chars.data()); - chars.push_back(0); - assert_cast(dest).insertData(str.data(), str.size()); + assert_cast(dest).insertData(reinterpret_cast(chars.data()), chars.size()); } else { @@ -1423,7 +1427,6 @@ public: chars.push_back(0); std::string str = reinterpret_cast(chars.data()); - auto padded_str = str + std::string(col_str.getN() - std::min(col_str.getN(), str.length()), '\0'); col_str.insertData(str.data(), str.size()); @@ -1441,10 +1444,11 @@ public: traverse(element, buf); buf.finalize(); chars.push_back(0); - std::string str = reinterpret_cast(chars.data()); - auto padded_str = str + std::string(fixed_length - std::min(fixed_length, str.length()), '\0'); - assert_cast(dest).insertData(padded_str.data(), padded_str.size()); + for (unsigned long i = 0; i < fixed_length - chars.size(); ++i) + chars.push_back(0); + + assert_cast(dest).insertData(reinterpret_cast(chars.data()), chars.size()); return true; } From f24991fa56f93e1ff55e37069e0aa58ab9380303 Mon Sep 17 00:00:00 2001 From: Alfonso Martinez Date: Fri, 11 Nov 2022 13:02:30 +0100 Subject: [PATCH 25/80] Remove pushing a last 0 in FixedStrings --- src/Functions/FunctionsJSON.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Functions/FunctionsJSON.cpp b/src/Functions/FunctionsJSON.cpp index f8d5f357549..0973c1f3e39 100644 --- a/src/Functions/FunctionsJSON.cpp +++ b/src/Functions/FunctionsJSON.cpp @@ -1424,7 +1424,6 @@ public: if (chars.size() > col_str.getN()) return false; - chars.push_back(0); std::string str = reinterpret_cast(chars.data()); col_str.insertData(str.data(), str.size()); @@ -1443,7 +1442,6 @@ public: WriteBufferFromVector buf(chars, AppendModeTag()); traverse(element, buf); buf.finalize(); - chars.push_back(0); for (unsigned long i = 0; i < fixed_length - chars.size(); ++i) chars.push_back(0); From ad377b357f0dbd3a9b9671d5828b6d6e34963057 Mon Sep 17 00:00:00 2001 From: serxa Date: Fri, 11 Nov 2022 13:24:43 +0000 Subject: [PATCH 26/80] fix backup tests --- src/Backups/BackupIO_S3.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Backups/BackupIO_S3.cpp b/src/Backups/BackupIO_S3.cpp index 5a4e804a778..8342749e230 100644 --- a/src/Backups/BackupIO_S3.cpp +++ b/src/Backups/BackupIO_S3.cpp @@ -89,6 +89,7 @@ BackupReaderS3::BackupReaderS3( , read_settings(context_->getReadSettings()) , request_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString()).request_settings) { + request_settings.max_single_read_retries = context_->getSettingsRef().s3_max_single_read_retries; // FIXME: Avoid taking value for endpoint } DataSourceDescription BackupReaderS3::getDataSourceDescription() const @@ -128,6 +129,7 @@ BackupWriterS3::BackupWriterS3( , log(&Poco::Logger::get("BackupWriterS3")) { request_settings.updateFromSettingsIfEmpty(context_->getSettingsRef()); + request_settings.max_single_read_retries = context_->getSettingsRef().s3_max_single_read_retries; // FIXME: Avoid taking value for endpoint } DataSourceDescription BackupWriterS3::getDataSourceDescription() const From b12ebab65c5e1b084a61900b61094ab0b515a886 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Fri, 11 Nov 2022 18:07:15 +0000 Subject: [PATCH 27/80] Enable keeper fault injection and retries for insert queries in funcntional tests --- docker/test/stress/run.sh | 3 +++ tests/config/users.d/insert_keeper_retries.xml | 8 ++++++++ 2 files changed, 11 insertions(+) create mode 100644 tests/config/users.d/insert_keeper_retries.xml diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index 78f627bf45e..36b1cd1ef0d 100644 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -388,6 +388,9 @@ else rm -f /etc/clickhouse-server/config.d/storage_conf.xml ||: rm -f /etc/clickhouse-server/config.d/azure_storage_conf.xml ||: + # it uses recently introduced settings which previous versions may not have + rm -f /etc/clickhouse-server/users.d/insert_keeper_retries.xml ||: + start clickhouse-client --query="SELECT 'Server version: ', version()" diff --git a/tests/config/users.d/insert_keeper_retries.xml b/tests/config/users.d/insert_keeper_retries.xml new file mode 100644 index 00000000000..462c9df5248 --- /dev/null +++ b/tests/config/users.d/insert_keeper_retries.xml @@ -0,0 +1,8 @@ + + + + 20 + 0.01 + + + From bfea3deed98a85f2d94f82fbe2fa1f5e2f11bd58 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 14 Nov 2022 06:09:03 +0100 Subject: [PATCH 28/80] Miscellaneous changes --- src/QueryPipeline/RemoteQueryExecutor.h | 2 +- src/Server/TCPHandler.h | 2 -- .../ParallelReplicasReadingCoordinator.h | 1 + src/Storages/MergeTree/RequestResponse.cpp | 19 ++++++++++++++----- 4 files changed, 16 insertions(+), 8 deletions(-) diff --git a/src/QueryPipeline/RemoteQueryExecutor.h b/src/QueryPipeline/RemoteQueryExecutor.h index 78bc9f611ab..e2ebdd3cbec 100644 --- a/src/QueryPipeline/RemoteQueryExecutor.h +++ b/src/QueryPipeline/RemoteQueryExecutor.h @@ -45,7 +45,7 @@ public: /// decide whether to deny or to accept that request. struct Extension { - std::shared_ptr task_iterator{nullptr}; + std::shared_ptr task_iterator; std::shared_ptr parallel_reading_coordinator; std::optional replica_info; }; diff --git a/src/Server/TCPHandler.h b/src/Server/TCPHandler.h index 9c8d3ca60f3..0b296aaef4e 100644 --- a/src/Server/TCPHandler.h +++ b/src/Server/TCPHandler.h @@ -19,8 +19,6 @@ #include #include -#include - #include "IServer.h" #include "Server/TCPProtocolStackData.h" #include "base/types.h" diff --git a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.h b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.h index bd2082be6c2..4800533e919 100644 --- a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.h +++ b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.h @@ -3,6 +3,7 @@ #include #include + namespace DB { diff --git a/src/Storages/MergeTree/RequestResponse.cpp b/src/Storages/MergeTree/RequestResponse.cpp index a266540b99a..f4e09190596 100644 --- a/src/Storages/MergeTree/RequestResponse.cpp +++ b/src/Storages/MergeTree/RequestResponse.cpp @@ -7,21 +7,23 @@ #include + namespace DB { namespace ErrorCodes { extern const int UNKNOWN_PROTOCOL; + extern const int BAD_ARGUMENTS; } -static void readMarkRangesBinary(MarkRanges & ranges, ReadBuffer & buf, size_t MAX_RANGES_SIZE = DEFAULT_MAX_STRING_SIZE) +static void readMarkRangesBinary(MarkRanges & ranges, ReadBuffer & buf) { size_t size = 0; readVarUInt(size, buf); - if (size > MAX_RANGES_SIZE) - throw Poco::Exception("Too large ranges size."); + if (size > DEFAULT_MAX_STRING_SIZE) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Too large ranges size: {}.", size); ranges.resize(size); for (size_t i = 0; i < size; ++i) @@ -95,14 +97,21 @@ void PartitionReadRequest::deserialize(ReadBuffer & in) UInt64 PartitionReadRequest::getConsistentHash(size_t buckets) const { - auto hash = SipHash(); + SipHash hash; + + hash.update(partition_id.size()); hash.update(partition_id); + + hash.update(part_name.size()); hash.update(part_name); + + hash.update(projection_name.size()); hash.update(projection_name); hash.update(block_range.begin); hash.update(block_range.end); + hash.update(mark_ranges.size()); for (const auto & range : mark_ranges) { hash.update(range.begin); @@ -118,7 +127,7 @@ void PartitionReadResponse::serialize(WriteBuffer & out) const /// Must be the first writeVarUInt(DBMS_PARALLEL_REPLICAS_PROTOCOL_VERSION, out); - writeVarUInt(static_cast(denied), out); + writeBinary(denied, out); writeMarkRangesBinary(mark_ranges, out); } From 9a0a21db94de9626b6c4d04ebf0b3e8303c1bb2e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 14 Nov 2022 06:22:05 +0100 Subject: [PATCH 29/80] Miscellaneous changes --- src/QueryPipeline/RemoteQueryExecutor.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/QueryPipeline/RemoteQueryExecutor.h b/src/QueryPipeline/RemoteQueryExecutor.h index e2ebdd3cbec..8b8f21a3ae4 100644 --- a/src/QueryPipeline/RemoteQueryExecutor.h +++ b/src/QueryPipeline/RemoteQueryExecutor.h @@ -45,9 +45,9 @@ public: /// decide whether to deny or to accept that request. struct Extension { - std::shared_ptr task_iterator; - std::shared_ptr parallel_reading_coordinator; - std::optional replica_info; + std::shared_ptr task_iterator; + std::shared_ptr parallel_reading_coordinator; + std::optional replica_info; }; /// Takes already set connection. From e080161ba5dc560ab2a8b2ea52982c50192e80c2 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 14 Nov 2022 06:32:18 +0100 Subject: [PATCH 30/80] Miscellaneous changes --- src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h index 051854d8bc1..d85fb4d26a6 100644 --- a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h +++ b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h @@ -45,7 +45,7 @@ public: const MergeTreeReaderSettings & reader_settings_, bool use_uncompressed_cache_, const Names & virt_column_names_ = {}, - std::optional extension = {}); + std::optional extension_ = {}); ~MergeTreeBaseSelectProcessor() override; From 60c96072e1d07edb7aac36800a556988ca7a9fb8 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 14 Nov 2022 06:37:19 +0100 Subject: [PATCH 31/80] Miscellaneous changes --- src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h index d85fb4d26a6..99a5a2beddb 100644 --- a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h +++ b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h @@ -189,7 +189,7 @@ private: /// It won't work with reading in order or reading in reverse order, because we can possibly seek back. bool getDelayedTasks(); - /// It will form a request a request to coordinator and + /// It will form a request to coordinator and /// then reinitialize the mark ranges of this->task object Status performRequestToCoordinator(MarkRanges requested_ranges, bool delayed); From 85dc692f7ca4e9bcf33f910e762e6870aa14fff5 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 14 Nov 2022 06:37:55 +0100 Subject: [PATCH 32/80] Miscellaneous changes --- src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h index 99a5a2beddb..e385f5f4d25 100644 --- a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h +++ b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h @@ -194,7 +194,6 @@ private: Status performRequestToCoordinator(MarkRanges requested_ranges, bool delayed); void splitCurrentTaskRangesAndFillBuffer(); - }; } From 4966556f4732d37a996d8586a15778a61e92450a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 14 Nov 2022 06:41:51 +0100 Subject: [PATCH 33/80] Miscellaneous changes --- src/Storages/StorageMergeTree.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index a450a9ef3a9..22e416384aa 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -227,7 +227,7 @@ void StorageMergeTree::read( bool enable_parallel_reading = local_context->getClientInfo().collaborate_with_initiator; if (enable_parallel_reading) - LOG_TRACE(log, "Parallel reading from replicas enabled {}", enable_parallel_reading); + LOG_TRACE(log, "Parallel reading from replicas enabled: {}", enable_parallel_reading); if (auto plan = reader.read( column_names, storage_snapshot, query_info, local_context, max_block_size, num_streams, processed_stage, nullptr, enable_parallel_reading)) From cc1855ea33657ffa109056a297b698a4102b4bb8 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 14 Nov 2022 06:45:52 +0100 Subject: [PATCH 34/80] Miscellaneous changes --- src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp index b63e08b733d..3612b7d47a1 100644 --- a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp @@ -692,7 +692,7 @@ MergeTreeBaseSelectProcessor::Status MergeTreeBaseSelectProcessor::performReques .mark_ranges = std::move(requested_ranges) }; - /// Constistent hashing won't work with reading in order, because at the end of the execution + /// Consistent hashing won't work with reading in order, because at the end of the execution /// we could possibly seek back if (!delayed && canUseConsistentHashingForParallelReading()) { From 53ee7446dc88c02835b87565d78b43062cf4c242 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 14 Nov 2022 06:46:37 +0100 Subject: [PATCH 35/80] Miscellaneous changes --- src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp index 3612b7d47a1..38ca103f4ce 100644 --- a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp @@ -669,12 +669,11 @@ MergeTreeBaseSelectProcessor::Status MergeTreeBaseSelectProcessor::performReques if (task->data_part->isProjectionPart()) { part_name = task->data_part->getParentPart()->name; - projection_name = task->data_part->name; + projection_name = task->data_part->name; } else { part_name = task->data_part->name; - projection_name = ""; } PartBlockRange block_range From 15202833b1868fcd96e553d8ee98603cc87af3fb Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 14 Nov 2022 06:48:49 +0100 Subject: [PATCH 36/80] Miscellaneous changes --- src/Storages/MergeTree/RequestResponse.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/RequestResponse.h b/src/Storages/MergeTree/RequestResponse.h index 85c8f7181af..fba0071727d 100644 --- a/src/Storages/MergeTree/RequestResponse.h +++ b/src/Storages/MergeTree/RequestResponse.h @@ -14,7 +14,7 @@ namespace DB { -/// Represents a segment [left; right] +/// Represents a segment [left; right] of part's block numbers. struct PartBlockRange { Int64 begin; From a77032fafa1c87ce5fe70e3556142f928b8bb84d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 14 Nov 2022 06:54:58 +0100 Subject: [PATCH 37/80] Miscellaneous changes --- src/Common/Stopwatch.h | 1 + .../ParallelReplicasReadingCoordinator.cpp | 24 ++++++------------- 2 files changed, 8 insertions(+), 17 deletions(-) diff --git a/src/Common/Stopwatch.h b/src/Common/Stopwatch.h index cabc6d8ba1e..e17f3b18b5c 100644 --- a/src/Common/Stopwatch.h +++ b/src/Common/Stopwatch.h @@ -63,6 +63,7 @@ private: using StopwatchUniquePtr = std::unique_ptr; +/// Allows to atomically compare the elapsed time with a threshold and restart the watch if the elapsed time is not less. class AtomicStopwatch { public: diff --git a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp index 730f9a05814..7802b650a30 100644 --- a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp +++ b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp @@ -1,24 +1,14 @@ #include -#include -#include -#include -#include -#include #include -#include -#include -#include - #include -#include #include #include -#include "IO/WriteBufferFromString.h" -#include +#include #include + namespace DB { @@ -45,15 +35,15 @@ public: PartitionReadResponse ParallelReplicasReadingCoordinator::Impl::handleRequest(PartitionReadRequest request) { - AtomicStopwatch watch; + Stopwatch watch; + SCOPE_EXIT({ + LOG_TRACE(&Poco::Logger::get("ParallelReplicasReadingCoordinator"), "Time for handling request: {} ns", watch.elapsed()); + }); + std::lock_guard lock(mutex); auto partition_it = partitions.find(request.partition_id); - SCOPE_EXIT({ - LOG_TRACE(&Poco::Logger::get("ParallelReplicasReadingCoordinator"), "Time for handling request: {}ns", watch.elapsed()); - }); - PartToRead::PartAndProjectionNames part_and_projection { .part = request.part_name, From 8e87e141a4158e401c232a127292ea4ce12f88d5 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 14 Nov 2022 06:57:53 +0100 Subject: [PATCH 38/80] Miscellaneous changes --- src/Common/Stopwatch.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Common/Stopwatch.h b/src/Common/Stopwatch.h index e17f3b18b5c..32d1fca337d 100644 --- a/src/Common/Stopwatch.h +++ b/src/Common/Stopwatch.h @@ -63,6 +63,7 @@ private: using StopwatchUniquePtr = std::unique_ptr; +/// Allows to obtain the elapsed time concurrently with restarting the stopwatch. /// Allows to atomically compare the elapsed time with a threshold and restart the watch if the elapsed time is not less. class AtomicStopwatch { From df01689ccc2770fd34c6c76240605ca92b254888 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 14 Nov 2022 07:13:42 +0100 Subject: [PATCH 39/80] Better logs --- .../ParallelReplicasReadingCoordinator.cpp | 13 ++++++++--- src/Storages/MergeTree/RequestResponse.cpp | 23 +++++++++++++++++++ src/Storages/MergeTree/RequestResponse.h | 3 +++ 3 files changed, 36 insertions(+), 3 deletions(-) diff --git a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp index 7802b650a30..0f772a0fb94 100644 --- a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp +++ b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp @@ -35,10 +35,8 @@ public: PartitionReadResponse ParallelReplicasReadingCoordinator::Impl::handleRequest(PartitionReadRequest request) { + auto * log = &Poco::Logger::get("ParallelReplicasReadingCoordinator"); Stopwatch watch; - SCOPE_EXIT({ - LOG_TRACE(&Poco::Logger::get("ParallelReplicasReadingCoordinator"), "Time for handling request: {} ns", watch.elapsed()); - }); std::lock_guard lock(mutex); @@ -70,6 +68,7 @@ PartitionReadResponse ParallelReplicasReadingCoordinator::Impl::handleRequest(Pa partition_reading.mark_ranges_in_part.insert({part_and_projection, std::move(mark_ranges_index)}); partitions.insert({request.partition_id, std::move(partition_reading)}); + LOG_TRACE(log, "Request is first in partition, accepted in {} ns: {}", watch.elapsed(), request.toString()); return {.denied = false, .mark_ranges = std::move(request.mark_ranges)}; } @@ -85,6 +84,7 @@ PartitionReadResponse ParallelReplicasReadingCoordinator::Impl::handleRequest(Pa { case PartSegments::IntersectionResult::REJECT: { + LOG_TRACE(log, "Request rejected in {} ns: {}", watch.elapsed(), request.toString()); return {.denied = true, .mark_ranges = {}}; } case PartSegments::IntersectionResult::EXACTLY_ONE_INTERSECTION: @@ -100,6 +100,12 @@ PartitionReadResponse ParallelReplicasReadingCoordinator::Impl::handleRequest(Pa auto result_ranges = result.convertToMarkRangesFinal(); const bool denied = result_ranges.empty(); + + if (denied) + LOG_TRACE(log, "Request rejected due to intersection in {} ns: {}", watch.elapsed(), request.toString()); + else + LOG_TRACE(log, "Request accepted partially in {} ns: {}", watch.elapsed(), request.toString()); + return {.denied = denied, .mark_ranges = std::move(result_ranges)}; } case PartSegments::IntersectionResult::NO_INTERSECTION: @@ -111,6 +117,7 @@ PartitionReadResponse ParallelReplicasReadingCoordinator::Impl::handleRequest(Pa ); partition_reading.mark_ranges_in_part.insert({part_and_projection, std::move(mark_ranges_index)}); + LOG_TRACE(log, "Request accepted in {} ns: {}", watch.elapsed(), request.toString()); return {.denied = false, .mark_ranges = std::move(request.mark_ranges)}; } } diff --git a/src/Storages/MergeTree/RequestResponse.cpp b/src/Storages/MergeTree/RequestResponse.cpp index f4e09190596..c7fbaf1e3c2 100644 --- a/src/Storages/MergeTree/RequestResponse.cpp +++ b/src/Storages/MergeTree/RequestResponse.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include @@ -62,6 +63,28 @@ void PartitionReadRequest::serialize(WriteBuffer & out) const } +String PartitionReadRequest::toString() const +{ + WriteBufferFromOwnString out; + out << "partition: " << partition_id << ", part: " << part_name; + if (!projection_name.empty()) + out << ", projection: " << projection_name; + out << ", block range: [" << block_range.begin << ", " << block_range.end << "]"; + out << ", mark ranges: "; + + bool is_first = true; + for (const auto & [begin, end] : mark_ranges) + { + if (!is_first) + out << ", "; + out << "[" << begin << ", " << end << ")"; + is_first = false; + } + + return out.str(); +} + + void PartitionReadRequest::describe(WriteBuffer & out) const { String result; diff --git a/src/Storages/MergeTree/RequestResponse.h b/src/Storages/MergeTree/RequestResponse.h index fba0071727d..c8fe81cd2cd 100644 --- a/src/Storages/MergeTree/RequestResponse.h +++ b/src/Storages/MergeTree/RequestResponse.h @@ -39,6 +39,9 @@ struct PartitionReadRequest void deserialize(ReadBuffer & in); UInt64 getConsistentHash(size_t buckets) const; + + /// Describe it for debugging purposes. + String toString() const; }; struct PartitionReadResponse From 99f84d8095e226ac7571333dcc64523085321c5d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 14 Nov 2022 07:22:02 +0100 Subject: [PATCH 40/80] Better logs --- .../ParallelReplicasReadingCoordinator.cpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp index 0f772a0fb94..009127046a2 100644 --- a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp +++ b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp @@ -37,7 +37,8 @@ PartitionReadResponse ParallelReplicasReadingCoordinator::Impl::handleRequest(Pa { auto * log = &Poco::Logger::get("ParallelReplicasReadingCoordinator"); Stopwatch watch; - + + String request_description = request_description; std::lock_guard lock(mutex); auto partition_it = partitions.find(request.partition_id); @@ -68,7 +69,7 @@ PartitionReadResponse ParallelReplicasReadingCoordinator::Impl::handleRequest(Pa partition_reading.mark_ranges_in_part.insert({part_and_projection, std::move(mark_ranges_index)}); partitions.insert({request.partition_id, std::move(partition_reading)}); - LOG_TRACE(log, "Request is first in partition, accepted in {} ns: {}", watch.elapsed(), request.toString()); + LOG_TRACE(log, "Request is first in partition, accepted in {} ns: {}", watch.elapsed(), request_description); return {.denied = false, .mark_ranges = std::move(request.mark_ranges)}; } @@ -84,7 +85,7 @@ PartitionReadResponse ParallelReplicasReadingCoordinator::Impl::handleRequest(Pa { case PartSegments::IntersectionResult::REJECT: { - LOG_TRACE(log, "Request rejected in {} ns: {}", watch.elapsed(), request.toString()); + LOG_TRACE(log, "Request rejected in {} ns: {}", watch.elapsed(), request_description); return {.denied = true, .mark_ranges = {}}; } case PartSegments::IntersectionResult::EXACTLY_ONE_INTERSECTION: @@ -102,9 +103,9 @@ PartitionReadResponse ParallelReplicasReadingCoordinator::Impl::handleRequest(Pa const bool denied = result_ranges.empty(); if (denied) - LOG_TRACE(log, "Request rejected due to intersection in {} ns: {}", watch.elapsed(), request.toString()); + LOG_TRACE(log, "Request rejected due to intersection in {} ns: {}", watch.elapsed(), request_description); else - LOG_TRACE(log, "Request accepted partially in {} ns: {}", watch.elapsed(), request.toString()); + LOG_TRACE(log, "Request accepted partially in {} ns: {}", watch.elapsed(), request_description); return {.denied = denied, .mark_ranges = std::move(result_ranges)}; } @@ -117,7 +118,7 @@ PartitionReadResponse ParallelReplicasReadingCoordinator::Impl::handleRequest(Pa ); partition_reading.mark_ranges_in_part.insert({part_and_projection, std::move(mark_ranges_index)}); - LOG_TRACE(log, "Request accepted in {} ns: {}", watch.elapsed(), request.toString()); + LOG_TRACE(log, "Request accepted in {} ns: {}", watch.elapsed(), request_description); return {.denied = false, .mark_ranges = std::move(request.mark_ranges)}; } } From 582db233c46357a0c172a741936edd53d7e4d659 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 14 Nov 2022 07:22:17 +0100 Subject: [PATCH 41/80] Better logs --- src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp index 009127046a2..54b96ee2015 100644 --- a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp +++ b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp @@ -38,7 +38,7 @@ PartitionReadResponse ParallelReplicasReadingCoordinator::Impl::handleRequest(Pa auto * log = &Poco::Logger::get("ParallelReplicasReadingCoordinator"); Stopwatch watch; - String request_description = request_description; + String request_description = request.toString(); std::lock_guard lock(mutex); auto partition_it = partitions.find(request.partition_id); From 9c48664f11fb69023df3b8429bb10575111819c4 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 14 Nov 2022 07:48:50 +0100 Subject: [PATCH 42/80] Better logs --- src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp index 38ca103f4ce..227a5c2a0ca 100644 --- a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp @@ -690,6 +690,7 @@ MergeTreeBaseSelectProcessor::Status MergeTreeBaseSelectProcessor::performReques .block_range = std::move(block_range), .mark_ranges = std::move(requested_ranges) }; + String request_description = request.toString(); /// Consistent hashing won't work with reading in order, because at the end of the execution /// we could possibly seek back @@ -701,6 +702,7 @@ MergeTreeBaseSelectProcessor::Status MergeTreeBaseSelectProcessor::performReques auto delayed_task = std::make_unique(*task); // Create a copy delayed_task->mark_ranges = std::move(request.mark_ranges); delayed_tasks.emplace_back(std::move(delayed_task)); + LOG_TRACE(log, "Request delayed by hash: {}", request_description); return Status::Denied; } } @@ -708,17 +710,24 @@ MergeTreeBaseSelectProcessor::Status MergeTreeBaseSelectProcessor::performReques auto optional_response = extension.value().callback(std::move(request)); if (!optional_response.has_value()) + { + LOG_TRACE(log, "Request cancelled: {}", request_description); return Status::Cancelled; + } auto response = optional_response.value(); task->mark_ranges = std::move(response.mark_ranges); if (response.denied || task->mark_ranges.empty()) + { + LOG_TRACE(log, "Request rejected: {}", request_description); return Status::Denied; + } finalizeNewTask(); + LOG_TRACE(log, "Request accepted: {}", request_description); return Status::Accepted; } From 17693de05391ea86f57f3d9514d77cf9b974cca0 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 14 Nov 2022 08:01:48 +0100 Subject: [PATCH 43/80] Better logs --- src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp index 54b96ee2015..e07f19fb64c 100644 --- a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp +++ b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp @@ -37,7 +37,7 @@ PartitionReadResponse ParallelReplicasReadingCoordinator::Impl::handleRequest(Pa { auto * log = &Poco::Logger::get("ParallelReplicasReadingCoordinator"); Stopwatch watch; - + String request_description = request.toString(); std::lock_guard lock(mutex); From 4edd8b013593ddfa559608a40533eab839f4a235 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 14 Nov 2022 08:07:27 +0100 Subject: [PATCH 44/80] Remove unused method --- src/Storages/MergeTree/RequestResponse.cpp | 14 -------------- src/Storages/MergeTree/RequestResponse.h | 1 - 2 files changed, 15 deletions(-) diff --git a/src/Storages/MergeTree/RequestResponse.cpp b/src/Storages/MergeTree/RequestResponse.cpp index c7fbaf1e3c2..2ea6b0c9f9f 100644 --- a/src/Storages/MergeTree/RequestResponse.cpp +++ b/src/Storages/MergeTree/RequestResponse.cpp @@ -85,20 +85,6 @@ String PartitionReadRequest::toString() const } -void PartitionReadRequest::describe(WriteBuffer & out) const -{ - String result; - result += fmt::format("partition_id: {} \n", partition_id); - result += fmt::format("part_name: {} \n", part_name); - result += fmt::format("projection_name: {} \n", projection_name); - result += fmt::format("block_range: ({}, {}) \n", block_range.begin, block_range.end); - result += "mark_ranges: "; - for (const auto & range : mark_ranges) - result += fmt::format("({}, {}) ", range.begin, range.end); - result += '\n'; - out.write(result.c_str(), result.size()); -} - void PartitionReadRequest::deserialize(ReadBuffer & in) { UInt64 version; diff --git a/src/Storages/MergeTree/RequestResponse.h b/src/Storages/MergeTree/RequestResponse.h index c8fe81cd2cd..ce9dc55f479 100644 --- a/src/Storages/MergeTree/RequestResponse.h +++ b/src/Storages/MergeTree/RequestResponse.h @@ -35,7 +35,6 @@ struct PartitionReadRequest MarkRanges mark_ranges; void serialize(WriteBuffer & out) const; - void describe(WriteBuffer & out) const; void deserialize(ReadBuffer & in); UInt64 getConsistentHash(size_t buckets) const; From 986e9a60daa24ac6682e92ccf0247969d1dc1029 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Thu, 10 Nov 2022 13:03:54 +0100 Subject: [PATCH 45/80] Analyzer aggregation totals crash fix --- src/Planner/Planner.cpp | 22 +++++++++---------- ...regation_totals_rollup_crash_fix.reference | 8 +++++++ ...er_aggregation_totals_rollup_crash_fix.sql | 5 +++++ 3 files changed, 24 insertions(+), 11 deletions(-) create mode 100644 tests/queries/0_stateless/02479_analyzer_aggregation_totals_rollup_crash_fix.reference create mode 100644 tests/queries/0_stateless/02479_analyzer_aggregation_totals_rollup_crash_fix.sql diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp index 56cc73456ce..28be1a83088 100644 --- a/src/Planner/Planner.cpp +++ b/src/Planner/Planner.cpp @@ -498,17 +498,6 @@ void Planner::buildQueryPlanIfNeeded() should_produce_results_in_order_of_bucket_number); query_plan.addStep(std::move(aggregating_step)); - if (query_node.isGroupByWithRollup()) - { - auto rollup_step = std::make_unique(query_plan.getCurrentDataStream(), std::move(aggregator_params), true /*final*/, settings.group_by_use_nulls); - query_plan.addStep(std::move(rollup_step)); - } - else if (query_node.isGroupByWithCube()) - { - auto cube_step = std::make_unique(query_plan.getCurrentDataStream(), std::move(aggregator_params), true /*final*/, settings.group_by_use_nulls); - query_plan.addStep(std::move(cube_step)); - } - if (query_node.isGroupByWithTotals()) { const auto & having_analysis_result = expression_analysis_result.getHaving(); @@ -528,6 +517,17 @@ void Planner::buildQueryPlanIfNeeded() query_plan.addStep(std::move(totals_having_step)); } + + if (query_node.isGroupByWithRollup()) + { + auto rollup_step = std::make_unique(query_plan.getCurrentDataStream(), std::move(aggregator_params), true /*final*/, settings.group_by_use_nulls); + query_plan.addStep(std::move(rollup_step)); + } + else if (query_node.isGroupByWithCube()) + { + auto cube_step = std::make_unique(query_plan.getCurrentDataStream(), std::move(aggregator_params), true /*final*/, settings.group_by_use_nulls); + query_plan.addStep(std::move(cube_step)); + } } if (!having_executed && expression_analysis_result.hasHaving()) diff --git a/tests/queries/0_stateless/02479_analyzer_aggregation_totals_rollup_crash_fix.reference b/tests/queries/0_stateless/02479_analyzer_aggregation_totals_rollup_crash_fix.reference new file mode 100644 index 00000000000..7c5d87e1389 --- /dev/null +++ b/tests/queries/0_stateless/02479_analyzer_aggregation_totals_rollup_crash_fix.reference @@ -0,0 +1,8 @@ +0 +0 + +0 +((0.0001)) 0 +((0.0001)) 0 + +((0.0001)) 0 diff --git a/tests/queries/0_stateless/02479_analyzer_aggregation_totals_rollup_crash_fix.sql b/tests/queries/0_stateless/02479_analyzer_aggregation_totals_rollup_crash_fix.sql new file mode 100644 index 00000000000..6cd3e6a9385 --- /dev/null +++ b/tests/queries/0_stateless/02479_analyzer_aggregation_totals_rollup_crash_fix.sql @@ -0,0 +1,5 @@ +SET allow_experimental_analyzer = 1; + +SELECT anyLast(number) FROM numbers(1) GROUP BY number WITH ROLLUP WITH TOTALS; + +SELECT tuple(tuple(0.0001)), anyLast(number) FROM numbers(1) GROUP BY number WITH ROLLUP WITH TOTALS; From 769cef94588c8e0e97a807ae1ede657561fdfdfe Mon Sep 17 00:00:00 2001 From: Alfonso Martinez Date: Mon, 14 Nov 2022 11:51:50 +0100 Subject: [PATCH 46/80] Replaced str with chars directly --- src/Functions/FunctionsJSON.cpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/Functions/FunctionsJSON.cpp b/src/Functions/FunctionsJSON.cpp index 0973c1f3e39..2234c582ba6 100644 --- a/src/Functions/FunctionsJSON.cpp +++ b/src/Functions/FunctionsJSON.cpp @@ -1424,9 +1424,8 @@ public: if (chars.size() > col_str.getN()) return false; - std::string str = reinterpret_cast(chars.data()); - - col_str.insertData(str.data(), str.size()); + chars.resize_fill(col_str.getN()); + col_str.insertData(reinterpret_cast(chars.data()), chars.size()); return true; @@ -1443,9 +1442,9 @@ public: traverse(element, buf); buf.finalize(); - for (unsigned long i = 0; i < fixed_length - chars.size(); ++i) - chars.push_back(0); - + if (chars.size() > fixed_length) + return false; + chars.resize_fill(fixed_length); assert_cast(dest).insertData(reinterpret_cast(chars.data()), chars.size()); return true; From 28fec44ffcf7d27dfea25206538aae02cde6558f Mon Sep 17 00:00:00 2001 From: vdimir Date: Mon, 14 Nov 2022 20:45:28 +0000 Subject: [PATCH 47/80] Fix merging null values in AggregateFunctionSumMap --- .../AggregateFunctionSumMap.h | 17 ++- src/Common/FieldVisitorSum.cpp | 7 +- .../02480_max_map_null_totals.reference | 119 ++++++++++++++++++ .../0_stateless/02480_max_map_null_totals.sql | 39 ++++++ 4 files changed, 178 insertions(+), 4 deletions(-) create mode 100644 tests/queries/0_stateless/02480_max_map_null_totals.reference create mode 100644 tests/queries/0_stateless/02480_max_map_null_totals.sql diff --git a/src/AggregateFunctions/AggregateFunctionSumMap.h b/src/AggregateFunctions/AggregateFunctionSumMap.h index 6a8fc9e99d8..cee59fcc907 100644 --- a/src/AggregateFunctions/AggregateFunctionSumMap.h +++ b/src/AggregateFunctions/AggregateFunctionSumMap.h @@ -202,7 +202,7 @@ public: auto & merged_maps = this->data(place).merged_maps; for (size_t col = 0, size = values_types.size(); col < size; ++col) { - const auto & array_column = assert_cast(*columns[col + 1]); + const auto & array_column = assert_cast(*columns[col + 1]); const IColumn & value_column = array_column.getData(); const IColumn::Offsets & offsets = array_column.getOffsets(); const size_t values_vec_offset = offsets[row_num - 1]; @@ -532,7 +532,12 @@ private: public: explicit FieldVisitorMax(const Field & rhs_) : rhs(rhs_) {} - bool operator() (Null &) const { throw Exception("Cannot compare Nulls", ErrorCodes::LOGICAL_ERROR); } + bool operator() (Null &) const + { + /// Do not update current value, skip nulls + return false; + } + bool operator() (AggregateFunctionStateData &) const { throw Exception("Cannot compare AggregateFunctionStates", ErrorCodes::LOGICAL_ERROR); } bool operator() (Array & x) const { return compareImpl(x); } @@ -567,7 +572,13 @@ private: public: explicit FieldVisitorMin(const Field & rhs_) : rhs(rhs_) {} - bool operator() (Null &) const { throw Exception("Cannot compare Nulls", ErrorCodes::LOGICAL_ERROR); } + + bool operator() (Null &) const + { + /// Do not update current value, skip nulls + return false; + } + bool operator() (AggregateFunctionStateData &) const { throw Exception("Cannot sum AggregateFunctionStates", ErrorCodes::LOGICAL_ERROR); } bool operator() (Array & x) const { return compareImpl(x); } diff --git a/src/Common/FieldVisitorSum.cpp b/src/Common/FieldVisitorSum.cpp index 2c404c33177..332b1a4255b 100644 --- a/src/Common/FieldVisitorSum.cpp +++ b/src/Common/FieldVisitorSum.cpp @@ -21,7 +21,12 @@ bool FieldVisitorSum::operator() (UInt64 & x) const bool FieldVisitorSum::operator() (Float64 & x) const { x += rhs.get(); return x != 0; } -bool FieldVisitorSum::operator() (Null &) const { throw Exception("Cannot sum Nulls", ErrorCodes::LOGICAL_ERROR); } +bool FieldVisitorSum::operator() (Null &) const +{ + /// Do not add anything + return rhs != 0; +} + bool FieldVisitorSum::operator() (String &) const { throw Exception("Cannot sum Strings", ErrorCodes::LOGICAL_ERROR); } bool FieldVisitorSum::operator() (Array &) const { throw Exception("Cannot sum Arrays", ErrorCodes::LOGICAL_ERROR); } bool FieldVisitorSum::operator() (Tuple &) const { throw Exception("Cannot sum Tuples", ErrorCodes::LOGICAL_ERROR); } diff --git a/tests/queries/0_stateless/02480_max_map_null_totals.reference b/tests/queries/0_stateless/02480_max_map_null_totals.reference new file mode 100644 index 00000000000..5cc9b5a495f --- /dev/null +++ b/tests/queries/0_stateless/02480_max_map_null_totals.reference @@ -0,0 +1,119 @@ +([-1,0],[0,0]) +([1,2],[0,2]) +([0,1],[0,1]) + +([-1,0,1,2],[0,0,0,2]) +([-1,0],[0,0]) +([1,2],[0,2]) +([0,1],[0,1]) +([-1,0,1,2],[0,0,0,2]) +([-1,0],[0,0]) +([1,2],[0,2]) +([0,1],[0,1]) +([-1,0,1,2],[0,0,0,2]) +([-1,0],[0,0]) +([1,2],[0,2]) +([0,1],[0,1]) + +([-1,0,1,2],[0,0,0,2]) +([-1,0],[0,0]) +([1,2],[0,2]) +([0,1],[0,1]) +([-1,0,1,2],[0,0,0,2]) +([-1,0],[0,0]) +([1,2],[0,2]) +([0,1],[0,1]) +([-1,0,1,2],[0,0,0,2]) +([0],[0]) +([2],[2]) +([1],[1]) + +([0,2],[0,2]) +([0],[0]) +([2],[2]) +([1],[1]) +([0,2],[0,2]) +([0],[0]) +([2],[2]) +([1],[1]) +([0,2],[0,2]) +- +([-1,0],[0,0]) +([1,2],[0,2]) +([0,1],[0,1]) + +([-1,0,1,2],[0,0,0,2]) +([-1,0],[0,0]) +([1,2],[0,2]) +([0,1],[0,1]) +([-1,0,1,2],[0,0,0,2]) +([-1,0],[0,0]) +([1,2],[0,2]) +([0,1],[0,1]) +([-1,0,1,2],[0,0,0,2]) +([-1,0],[0,0]) +([1,2],[0,2]) +([0,1],[0,1]) + +([-1,0,1,2],[0,0,0,2]) +([-1,0],[0,0]) +([1,2],[0,2]) +([0,1],[0,1]) +([-1,0,1,2],[0,0,0,2]) +([-1,0],[0,0]) +([1,2],[0,2]) +([0,1],[0,1]) +([-1,0,1,2],[0,0,0,2]) +([0],[0]) +([2],[2]) +([1],[1]) + +([0,2],[0,2]) +([0],[0]) +([2],[2]) +([1],[1]) +([0,2],[0,2]) +([0],[0]) +([2],[2]) +([1],[1]) +([0,2],[0,2]) +- +([-1,0],[0,0]) +([1,2],[0,2]) +([0,1],[0,1]) + +([-1,0,1,2],[0,0,0,2]) +([-1,0],[0,0]) +([1,2],[0,2]) +([0,1],[0,1]) +([-1,0,1,2],[0,0,0,2]) +([-1,0],[0,0]) +([1,2],[0,2]) +([0,1],[0,1]) +([-1,0,1,2],[0,0,0,2]) +([-1,0],[0,0]) +([1,2],[0,2]) +([0,1],[0,1]) + +([-1,0,1,2],[0,0,0,2]) +([-1,0],[0,0]) +([1,2],[0,2]) +([0,1],[0,1]) +([-1,0,1,2],[0,0,0,2]) +([-1,0],[0,0]) +([1,2],[0,2]) +([0,1],[0,1]) +([-1,0,1,2],[0,0,0,2]) +([0],[0]) +([2],[2]) +([1],[1]) + +([0,2],[0,2]) +([0],[0]) +([2],[2]) +([1],[1]) +([0,2],[0,2]) +([0],[0]) +([2],[2]) +([1],[1]) +([0,2],[0,2]) diff --git a/tests/queries/0_stateless/02480_max_map_null_totals.sql b/tests/queries/0_stateless/02480_max_map_null_totals.sql new file mode 100644 index 00000000000..81e2a5c4243 --- /dev/null +++ b/tests/queries/0_stateless/02480_max_map_null_totals.sql @@ -0,0 +1,39 @@ +SELECT maxMap([number % 3, number % 4 - 1], [number, NULL]) FROM numbers(3) GROUP BY number WITH TOTALS; +SELECT maxMap([number % 3, number % 4 - 1], [number, NULL]) FROM numbers(3) GROUP BY number WITH ROLLUP; +SELECT maxMap([number % 3, number % 4 - 1], [number, NULL]) FROM numbers(3) GROUP BY number WITH CUBE; + +SELECT minMap([number % 3, number % 4 - 1], [number, NULL]) FROM numbers(3) GROUP BY number WITH TOTALS; +SELECT minMap([number % 3, number % 4 - 1], [number, NULL]) FROM numbers(3) GROUP BY number WITH ROLLUP; +SELECT minMap([number % 3, number % 4 - 1], [number, NULL]) FROM numbers(3) GROUP BY number WITH CUBE; + +SELECT sumMap([number % 3, number % 4 - 1], [number, NULL]) FROM numbers(3) GROUP BY number WITH TOTALS; +SELECT sumMap([number % 3, number % 4 - 1], [number, NULL]) FROM numbers(3) GROUP BY number WITH ROLLUP; +SELECT sumMap([number % 3, number % 4 - 1], [number, NULL]) FROM numbers(3) GROUP BY number WITH CUBE; + +SELECT '-'; + +SELECT maxMap([number % 3, number % 4 - 1], [number :: Float64, NULL]) FROM numbers(3) GROUP BY number WITH TOTALS; +SELECT maxMap([number % 3, number % 4 - 1], [number :: Float64, NULL]) FROM numbers(3) GROUP BY number WITH ROLLUP; +SELECT maxMap([number % 3, number % 4 - 1], [number :: Float64, NULL]) FROM numbers(3) GROUP BY number WITH CUBE; + +SELECT minMap([number % 3, number % 4 - 1], [number :: Float64, NULL]) FROM numbers(3) GROUP BY number WITH TOTALS; +SELECT minMap([number % 3, number % 4 - 1], [number :: Float64, NULL]) FROM numbers(3) GROUP BY number WITH ROLLUP; +SELECT minMap([number % 3, number % 4 - 1], [number :: Float64, NULL]) FROM numbers(3) GROUP BY number WITH CUBE; + +SELECT sumMap([number % 3, number % 4 - 1], [number :: Float64, NULL]) FROM numbers(3) GROUP BY number WITH TOTALS; +SELECT sumMap([number % 3, number % 4 - 1], [number :: Float64, NULL]) FROM numbers(3) GROUP BY number WITH ROLLUP; +SELECT sumMap([number % 3, number % 4 - 1], [number :: Float64, NULL]) FROM numbers(3) GROUP BY number WITH CUBE; + +SELECT '-'; + +SELECT maxMap([number % 3, number % 4 - 1], [number :: UInt256, NULL]) FROM numbers(3) GROUP BY number WITH TOTALS; +SELECT maxMap([number % 3, number % 4 - 1], [number :: UInt256, NULL]) FROM numbers(3) GROUP BY number WITH ROLLUP; +SELECT maxMap([number % 3, number % 4 - 1], [number :: UInt256, NULL]) FROM numbers(3) GROUP BY number WITH CUBE; + +SELECT minMap([number % 3, number % 4 - 1], [number :: UInt256, NULL]) FROM numbers(3) GROUP BY number WITH TOTALS; +SELECT minMap([number % 3, number % 4 - 1], [number :: UInt256, NULL]) FROM numbers(3) GROUP BY number WITH ROLLUP; +SELECT minMap([number % 3, number % 4 - 1], [number :: UInt256, NULL]) FROM numbers(3) GROUP BY number WITH CUBE; + +SELECT sumMap([number % 3, number % 4 - 1], [number :: UInt256, NULL]) FROM numbers(3) GROUP BY number WITH TOTALS; +SELECT sumMap([number % 3, number % 4 - 1], [number :: UInt256, NULL]) FROM numbers(3) GROUP BY number WITH ROLLUP; +SELECT sumMap([number % 3, number % 4 - 1], [number :: UInt256, NULL]) FROM numbers(3) GROUP BY number WITH CUBE; From 5eba20e1198b0bcd3fa91caa2a597e53cd5432fd Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Tue, 15 Nov 2022 09:46:24 +0800 Subject: [PATCH 48/80] modify return type from Int64 to UInt64 --- docs/en/sql-reference/functions/math-functions.md | 2 +- src/Functions/factorial.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/en/sql-reference/functions/math-functions.md b/docs/en/sql-reference/functions/math-functions.md index 7f349686d8b..47c27268b09 100644 --- a/docs/en/sql-reference/functions/math-functions.md +++ b/docs/en/sql-reference/functions/math-functions.md @@ -553,7 +553,7 @@ Result: ## factorial(n) -Computes the factorial of an integer value. It works with any native integer type including UInt(8|16|32|64) and Int(8|16|32|64). The return type is Int64. +Computes the factorial of an integer value. It works with any native integer type including UInt(8|16|32|64) and Int(8|16|32|64). The return type is UInt64. The factorial of 0 is 1. Likewise, the factorial() function returns 1 for any negative value. The maximum positive value for the input argument is 20, a value of 21 or greater overflows the range for Int64 and will cause exception throw. diff --git a/src/Functions/factorial.cpp b/src/Functions/factorial.cpp index 5c46b97c193..b76ef90a48d 100644 --- a/src/Functions/factorial.cpp +++ b/src/Functions/factorial.cpp @@ -15,7 +15,7 @@ namespace ErrorCodes template struct FactorialImpl { - using ResultType = Int64; + using ResultType = UInt64; static const constexpr bool allow_decimal = false; static const constexpr bool allow_fixed_string = false; static const constexpr bool allow_string_integer = false; @@ -101,7 +101,7 @@ REGISTER_FUNCTION(Factorial) factory.registerFunction( { R"( -Computes the factorial of an integer value. It works with any native integer type including UInt(8|16|32|64) and Int(8|16|32|64). The return type is Int64. +Computes the factorial of an integer value. It works with any native integer type including UInt(8|16|32|64) and Int(8|16|32|64). The return type is UInt64. The factorial of 0 is 1. Likewise, the factorial() function returns 1 for any negative value. The maximum positive value for the input argument is 20, a value of 21 or greater overflows the range for Int64 and will cause exception throw. )", From 0017416069fa23e1e530d6a56197669113657489 Mon Sep 17 00:00:00 2001 From: Vladimir C Date: Tue, 15 Nov 2022 11:55:54 +0100 Subject: [PATCH 49/80] Update src/Common/FieldVisitorSum.cpp --- src/Common/FieldVisitorSum.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/FieldVisitorSum.cpp b/src/Common/FieldVisitorSum.cpp index 332b1a4255b..db7b4850204 100644 --- a/src/Common/FieldVisitorSum.cpp +++ b/src/Common/FieldVisitorSum.cpp @@ -24,7 +24,7 @@ bool FieldVisitorSum::operator() (Float64 & x) const { x += rhs.get(); bool FieldVisitorSum::operator() (Null &) const { /// Do not add anything - return rhs != 0; + return false; } bool FieldVisitorSum::operator() (String &) const { throw Exception("Cannot sum Strings", ErrorCodes::LOGICAL_ERROR); } From 2a9ab046ddf44b040ad0a5a923b2381ed9b8778b Mon Sep 17 00:00:00 2001 From: Christoph Wurm Date: Tue, 15 Nov 2022 11:51:14 +0000 Subject: [PATCH 50/80] Unify spelling of DateTime --- docs/en/operations/system-tables/crash-log.md | 4 ++-- docs/en/operations/system-tables/mutations.md | 4 ++-- docs/en/operations/system-tables/replication_queue.md | 6 +++--- docs/en/sql-reference/data-types/date32.md | 2 +- docs/en/sql-reference/data-types/datetime.md | 2 +- docs/en/sql-reference/data-types/datetime64.md | 2 +- docs/en/sql-reference/functions/date-time-functions.md | 8 ++++---- docs/ru/operations/system-tables/crash-log.md | 4 ++-- docs/ru/operations/system-tables/mutations.md | 4 ++-- docs/ru/operations/system-tables/replication_queue.md | 6 +++--- docs/ru/sql-reference/data-types/date32.md | 2 +- docs/ru/sql-reference/functions/date-time-functions.md | 4 ++-- docs/zh/operations/system-tables/crash-log.md | 4 ++-- docs/zh/operations/system-tables/mutations.md | 4 ++-- docs/zh/operations/system-tables/replication_queue.md | 6 +++--- docs/zh/sql-reference/ansi.md | 2 +- docs/zh/sql-reference/data-types/datetime64.md | 2 +- docs/zh/sql-reference/functions/date-time-functions.md | 4 ++-- 18 files changed, 35 insertions(+), 35 deletions(-) diff --git a/docs/en/operations/system-tables/crash-log.md b/docs/en/operations/system-tables/crash-log.md index 0c0a4cd967d..a44b0db8e9b 100644 --- a/docs/en/operations/system-tables/crash-log.md +++ b/docs/en/operations/system-tables/crash-log.md @@ -7,8 +7,8 @@ Contains information about stack traces for fatal errors. The table does not exi Columns: -- `event_date` ([Datetime](../../sql-reference/data-types/datetime.md)) — Date of the event. -- `event_time` ([Datetime](../../sql-reference/data-types/datetime.md)) — Time of the event. +- `event_date` ([DateTime](../../sql-reference/data-types/datetime.md)) — Date of the event. +- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Time of the event. - `timestamp_ns` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Timestamp of the event with nanoseconds. - `signal` ([Int32](../../sql-reference/data-types/int-uint.md)) — Signal number. - `thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Thread ID. diff --git a/docs/en/operations/system-tables/mutations.md b/docs/en/operations/system-tables/mutations.md index 782d7c42ad2..0d3b764846b 100644 --- a/docs/en/operations/system-tables/mutations.md +++ b/docs/en/operations/system-tables/mutations.md @@ -15,7 +15,7 @@ Columns: - `command` ([String](/docs/en/sql-reference/data-types/string.md)) — The mutation command string (the part of the query after `ALTER TABLE [db.]table`). -- `create_time` ([Datetime](/docs/en/sql-reference/data-types/datetime.md)) — Date and time when the mutation command was submitted for execution. +- `create_time` ([DateTime](/docs/en/sql-reference/data-types/datetime.md)) — Date and time when the mutation command was submitted for execution. - `block_numbers.partition_id` ([Array](/docs/en/sql-reference/data-types/array.md)([String](/docs/en/sql-reference/data-types/string.md))) — For mutations of replicated tables, the array contains the partitions' IDs (one record for each partition). For mutations of non-replicated tables the array is empty. @@ -39,7 +39,7 @@ If there were problems with mutating some data parts, the following columns cont - `latest_failed_part` ([String](/docs/en/sql-reference/data-types/string.md)) — The name of the most recent part that could not be mutated. -- `latest_fail_time` ([Datetime](/docs/en/sql-reference/data-types/datetime.md)) — The date and time of the most recent part mutation failure. +- `latest_fail_time` ([DateTime](/docs/en/sql-reference/data-types/datetime.md)) — The date and time of the most recent part mutation failure. - `latest_fail_reason` ([String](/docs/en/sql-reference/data-types/string.md)) — The exception message that caused the most recent part mutation failure. diff --git a/docs/en/operations/system-tables/replication_queue.md b/docs/en/operations/system-tables/replication_queue.md index ced20b0048a..dff3bce246a 100644 --- a/docs/en/operations/system-tables/replication_queue.md +++ b/docs/en/operations/system-tables/replication_queue.md @@ -29,7 +29,7 @@ Columns: - `MUTATE_PART` — Apply one or several mutations to the part. - `ALTER_METADATA` — Apply alter modification according to global /metadata and /columns paths. -- `create_time` ([Datetime](../../sql-reference/data-types/datetime.md)) — Date and time when the task was submitted for execution. +- `create_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Date and time when the task was submitted for execution. - `required_quorum` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The number of replicas waiting for the task to complete with confirmation of completion. This column is only relevant for the `GET_PARTS` task. @@ -47,13 +47,13 @@ Columns: - `last_exception` ([String](../../sql-reference/data-types/string.md)) — Text message about the last error that occurred (if any). -- `last_attempt_time` ([Datetime](../../sql-reference/data-types/datetime.md)) — Date and time when the task was last attempted. +- `last_attempt_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Date and time when the task was last attempted. - `num_postponed` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The number of postponed tasks. - `postpone_reason` ([String](../../sql-reference/data-types/string.md)) — The reason why the task was postponed. -- `last_postpone_time` ([Datetime](../../sql-reference/data-types/datetime.md)) — Date and time when the task was last postponed. +- `last_postpone_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Date and time when the task was last postponed. - `merge_type` ([String](../../sql-reference/data-types/string.md)) — Type of the current merge. Empty if it's a mutation. diff --git a/docs/en/sql-reference/data-types/date32.md b/docs/en/sql-reference/data-types/date32.md index ff1a745785b..c8c7470d2cb 100644 --- a/docs/en/sql-reference/data-types/date32.md +++ b/docs/en/sql-reference/data-types/date32.md @@ -6,7 +6,7 @@ sidebar_label: Date32 # Date32 -A date. Supports the date range same with [Datetime64](../../sql-reference/data-types/datetime64.md). Stored in four bytes as the number of days since 1900-01-01. Allows storing values till 2299-12-31. +A date. Supports the date range same with [DateTime64](../../sql-reference/data-types/datetime64.md). Stored in four bytes as the number of days since 1900-01-01. Allows storing values till 2299-12-31. **Examples** diff --git a/docs/en/sql-reference/data-types/datetime.md b/docs/en/sql-reference/data-types/datetime.md index 85587882e01..7f7f21ded54 100644 --- a/docs/en/sql-reference/data-types/datetime.md +++ b/docs/en/sql-reference/data-types/datetime.md @@ -4,7 +4,7 @@ sidebar_position: 48 sidebar_label: DateTime --- -# Datetime +# DateTime Allows to store an instant in time, that can be expressed as a calendar date and a time of a day. diff --git a/docs/en/sql-reference/data-types/datetime64.md b/docs/en/sql-reference/data-types/datetime64.md index c7372e4b064..fa3a1eecd46 100644 --- a/docs/en/sql-reference/data-types/datetime64.md +++ b/docs/en/sql-reference/data-types/datetime64.md @@ -4,7 +4,7 @@ sidebar_position: 49 sidebar_label: DateTime64 --- -# Datetime64 +# DateTime64 Allows to store an instant in time, that can be expressed as a calendar date and a time of a day, with defined sub-second precision diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index f7ea2690b21..6156a823d58 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -550,7 +550,7 @@ Alias: `dateTrunc`. - Value, truncated to the specified part of date. -Type: [Datetime](../../sql-reference/data-types/datetime.md). +Type: [DateTime](../../sql-reference/data-types/datetime.md). **Example** @@ -881,7 +881,7 @@ now([timezone]) - Current date and time. -Type: [Datetime](../../sql-reference/data-types/datetime.md). +Type: [DateTime](../../sql-reference/data-types/datetime.md). **Example** @@ -932,7 +932,7 @@ now64([scale], [timezone]) - Current date and time with sub-second precision. -Type: [Datetime64](../../sql-reference/data-types/datetime64.md). +Type: [DateTime64](../../sql-reference/data-types/datetime64.md). **Example** @@ -968,7 +968,7 @@ nowInBlock([timezone]) - Current date and time at the moment of processing of each block of data. -Type: [Datetime](../../sql-reference/data-types/datetime.md). +Type: [DateTime](../../sql-reference/data-types/datetime.md). **Example** diff --git a/docs/ru/operations/system-tables/crash-log.md b/docs/ru/operations/system-tables/crash-log.md index 4ca8be5a199..68148fec6bd 100644 --- a/docs/ru/operations/system-tables/crash-log.md +++ b/docs/ru/operations/system-tables/crash-log.md @@ -7,8 +7,8 @@ slug: /ru/operations/system-tables/crash-log Колонки: -- `event_date` ([Datetime](../../sql-reference/data-types/datetime.md)) — Дата события. -- `event_time` ([Datetime](../../sql-reference/data-types/datetime.md)) — Время события. +- `event_date` ([DateTime](../../sql-reference/data-types/datetime.md)) — Дата события. +- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Время события. - `timestamp_ns` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Время события с наносекундами. - `signal` ([Int32](../../sql-reference/data-types/int-uint.md)) — Номер сигнала, пришедшего в поток. - `thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Идентификатор треда. diff --git a/docs/ru/operations/system-tables/mutations.md b/docs/ru/operations/system-tables/mutations.md index 20e4ebfdaf1..bb0bd44ed7a 100644 --- a/docs/ru/operations/system-tables/mutations.md +++ b/docs/ru/operations/system-tables/mutations.md @@ -15,7 +15,7 @@ slug: /ru/operations/system-tables/mutations - `command` ([String](../../sql-reference/data-types/string.md)) — команда мутации (часть запроса после `ALTER TABLE [db.]table`). -- `create_time` ([Datetime](../../sql-reference/data-types/datetime.md)) — дата и время создания мутации. +- `create_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — дата и время создания мутации. - `block_numbers.partition_id` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Для мутаций реплицированных таблиц массив содержит содержит номера партиций (по одной записи для каждой партиции). Для мутаций нереплицированных таблиц массив пустой. @@ -39,7 +39,7 @@ slug: /ru/operations/system-tables/mutations - `latest_failed_part` ([String](../../sql-reference/data-types/string.md)) — имя последнего куска, мутация которого не удалась. -- `latest_fail_time` ([Datetime](../../sql-reference/data-types/datetime.md)) — дата и время последней ошибки мутации. +- `latest_fail_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — дата и время последней ошибки мутации. - `latest_fail_reason` ([String](../../sql-reference/data-types/string.md)) — причина последней ошибки мутации. diff --git a/docs/ru/operations/system-tables/replication_queue.md b/docs/ru/operations/system-tables/replication_queue.md index 25de174e98f..60d42133153 100644 --- a/docs/ru/operations/system-tables/replication_queue.md +++ b/docs/ru/operations/system-tables/replication_queue.md @@ -29,7 +29,7 @@ slug: /ru/operations/system-tables/replication_queue - `MUTATE_PART` — применить одну или несколько мутаций к куску. - `ALTER_METADATA` — применить изменения структуры таблицы в результате запросов с выражением `ALTER`. -- `create_time` ([Datetime](../../sql-reference/data-types/datetime.md)) — дата и время отправки задачи на выполнение. +- `create_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — дата и время отправки задачи на выполнение. - `required_quorum` ([UInt32](../../sql-reference/data-types/int-uint.md)) — количество реплик, ожидающих завершения задачи, с подтверждением о завершении. Этот столбец актуален только для задачи `GET_PARTS`. @@ -47,13 +47,13 @@ slug: /ru/operations/system-tables/replication_queue - `last_exception` ([String](../../sql-reference/data-types/string.md)) — текст сообщения о последней возникшей ошибке, если таковые имеются. -- `last_attempt_time` ([Datetime](../../sql-reference/data-types/datetime.md)) — дата и время последней попытки выполнить задачу. +- `last_attempt_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — дата и время последней попытки выполнить задачу. - `num_postponed` ([UInt32](../../sql-reference/data-types/int-uint.md)) — количество отложенных задач. - `postpone_reason` ([String](../../sql-reference/data-types/string.md)) — причина, по которой была отложена задача. -- `last_postpone_time` ([Datetime](../../sql-reference/data-types/datetime.md)) — дата и время, когда была отложена задача в последний раз. +- `last_postpone_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — дата и время, когда была отложена задача в последний раз. - `merge_type` ([String](../../sql-reference/data-types/string.md)) — тип текущего слияния. Пусто, если это мутация. diff --git a/docs/ru/sql-reference/data-types/date32.md b/docs/ru/sql-reference/data-types/date32.md index fcb7d688c20..958b8e9763e 100644 --- a/docs/ru/sql-reference/data-types/date32.md +++ b/docs/ru/sql-reference/data-types/date32.md @@ -6,7 +6,7 @@ sidebar_label: Date32 # Date32 {#data_type-datetime32} -Дата. Поддерживается такой же диапазон дат, как для типа [Datetime64](../../sql-reference/data-types/datetime64.md). Значение хранится в четырех байтах и соответствует числу дней с 1900-01-01 по 2299-12-31. +Дата. Поддерживается такой же диапазон дат, как для типа [DateTime64](../../sql-reference/data-types/datetime64.md). Значение хранится в четырех байтах и соответствует числу дней с 1900-01-01 по 2299-12-31. **Пример** diff --git a/docs/ru/sql-reference/functions/date-time-functions.md b/docs/ru/sql-reference/functions/date-time-functions.md index f18c2ea258a..f430f5cae51 100644 --- a/docs/ru/sql-reference/functions/date-time-functions.md +++ b/docs/ru/sql-reference/functions/date-time-functions.md @@ -602,7 +602,7 @@ date_trunc(unit, value[, timezone]) - Дата и время, отсеченные до указанной части. -Тип: [Datetime](../../sql-reference/data-types/datetime.md). +Тип: [DateTime](../../sql-reference/data-types/datetime.md). **Примеры** @@ -913,7 +913,7 @@ now([timezone]) - Текущие дата и время. -Тип: [Datetime](../../sql-reference/data-types/datetime.md). +Тип: [DateTime](../../sql-reference/data-types/datetime.md). **Пример** diff --git a/docs/zh/operations/system-tables/crash-log.md b/docs/zh/operations/system-tables/crash-log.md index d0ed406fa0c..06087a34f35 100644 --- a/docs/zh/operations/system-tables/crash-log.md +++ b/docs/zh/operations/system-tables/crash-log.md @@ -7,8 +7,8 @@ slug: /zh/operations/system-tables/crash-log 列信息: -- `event_date` ([Datetime](../../sql-reference/data-types/datetime.md)) — 事件日期. -- `event_time` ([Datetime](../../sql-reference/data-types/datetime.md)) — 事件时间. +- `event_date` ([DateTime](../../sql-reference/data-types/datetime.md)) — 事件日期. +- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — 事件时间. - `timestamp_ns` ([UInt64](../../sql-reference/data-types/int-uint.md)) — 以纳秒为单位的事件时间戳. - `signal` ([Int32](../../sql-reference/data-types/int-uint.md)) — 信号编号. - `thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — 线程ID. diff --git a/docs/zh/operations/system-tables/mutations.md b/docs/zh/operations/system-tables/mutations.md index dbce0a59063..f5f82c1717a 100644 --- a/docs/zh/operations/system-tables/mutations.md +++ b/docs/zh/operations/system-tables/mutations.md @@ -15,7 +15,7 @@ slug: /zh/operations/system-tables/mutations - `command` ([String](../../sql-reference/data-types/string.md)) — mutation命令字符串(`ALTER TABLE [db.]table`语句之后的部分)。 -- `create_time` ([Datetime](../../sql-reference/data-types/datetime.md)) — mutation命令提交执行的日期和时间。 +- `create_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — mutation命令提交执行的日期和时间。 - `block_numbers.partition_id` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — 对于复制表的mutation,该数组包含分区的ID(每个分区都有一条记录)。对于非复制表的mutation,该数组为空。 @@ -39,7 +39,7 @@ slug: /zh/operations/system-tables/mutations - `latest_failed_part`([String](../../sql-reference/data-types/string.md)) — 最近不能mutation的part的名称。 -- `latest_fail_time`([Datetime](../../sql-reference/data-types/datetime.md)) — 最近的一个mutation失败的时间。 +- `latest_fail_time`([DateTime](../../sql-reference/data-types/datetime.md)) — 最近的一个mutation失败的时间。 - `latest_fail_reason`([String](../../sql-reference/data-types/string.md)) — 导致最近part的mutation失败的异常消息。 diff --git a/docs/zh/operations/system-tables/replication_queue.md b/docs/zh/operations/system-tables/replication_queue.md index e82569e378d..95a183cf9f7 100644 --- a/docs/zh/operations/system-tables/replication_queue.md +++ b/docs/zh/operations/system-tables/replication_queue.md @@ -29,7 +29,7 @@ slug: /zh/operations/system-tables/replication_queue - `MUTATE_PART` — 对分片应用一个或多个突变. - `ALTER_METADATA` — 根据全局 /metadata 和 /columns 路径应用alter修改. -- `create_time` ([Datetime](../../sql-reference/data-types/datetime.md)) — 提交任务执行的日期和时间. +- `create_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — 提交任务执行的日期和时间. - `required_quorum` ([UInt32](../../sql-reference/data-types/int-uint.md)) — 等待任务完成并确认完成的副本数. 此列仅与 `GET_PARTS` 任务相关. @@ -47,13 +47,13 @@ slug: /zh/operations/system-tables/replication_queue - `last_exception` ([String](../../sql-reference/data-types/string.md)) — 发生的最后一个错误的短信(如果有). -- `last_attempt_time` ([Datetime](../../sql-reference/data-types/datetime.md)) — 上次尝试任务的日期和时间. +- `last_attempt_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — 上次尝试任务的日期和时间. - `num_postponed` ([UInt32](../../sql-reference/data-types/int-uint.md)) — 延期任务数. - `postpone_reason` ([String](../../sql-reference/data-types/string.md)) — 任务延期的原因. -- `last_postpone_time` ([Datetime](../../sql-reference/data-types/datetime.md)) — 上次推迟任务的日期和时间. +- `last_postpone_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — 上次推迟任务的日期和时间. - `merge_type` ([String](../../sql-reference/data-types/string.md)) — 当前合并的类型. 如果是突变则为空. diff --git a/docs/zh/sql-reference/ansi.md b/docs/zh/sql-reference/ansi.md index 9cf335f89ef..cdccee0084f 100644 --- a/docs/zh/sql-reference/ansi.md +++ b/docs/zh/sql-reference/ansi.md @@ -152,7 +152,7 @@ sidebar_label: "ANSI\u517C\u5BB9\u6027" | F051-02 | TIME(时间)数据类型(并支持用于表达时间的字面量),小数秒精度至少为0 | 否 {.text-danger} | | | F051-03 | 时间戳数据类型(并支持用于表达时间戳的字面量),小数秒精度至少为0和6 | 是 {.text-danger} | | | F051-04 | 日期、时间和时间戳数据类型的比较谓词 | 是 {.text-success} | | -| F051-05 | Datetime 类型和字符串形式表达的时间之间的显式转换 | 是 {.text-success} | | +| F051-05 | DateTime 类型和字符串形式表达的时间之间的显式转换 | 是 {.text-success} | | | F051-06 | CURRENT_DATE | 否 {.text-danger} | 使用`today()`替代 | | F051-07 | LOCALTIME | 否 {.text-danger} | 使用`now()`替代 | | F051-08 | LOCALTIMESTAMP | 否 {.text-danger} | | diff --git a/docs/zh/sql-reference/data-types/datetime64.md b/docs/zh/sql-reference/data-types/datetime64.md index ee2d7a6f258..24888645cba 100644 --- a/docs/zh/sql-reference/data-types/datetime64.md +++ b/docs/zh/sql-reference/data-types/datetime64.md @@ -6,7 +6,7 @@ sidebar_position: 49 sidebar_label: DateTime64 --- -# Datetime64 {#data_type-datetime64} +# DateTime64 {#data_type-datetime64} 此类型允许以日期(date)加时间(time)的形式来存储一个时刻的时间值,具有定义的亚秒精度 diff --git a/docs/zh/sql-reference/functions/date-time-functions.md b/docs/zh/sql-reference/functions/date-time-functions.md index c666d01d15f..4bbd0e5b69b 100644 --- a/docs/zh/sql-reference/functions/date-time-functions.md +++ b/docs/zh/sql-reference/functions/date-time-functions.md @@ -539,7 +539,7 @@ date_trunc(unit, value[, timezone]) - 按指定的单位向前取整后的DateTime。 -类型: [Datetime](../../sql-reference/data-types/datetime.md). +类型: [DateTime](../../sql-reference/data-types/datetime.md). **示例** @@ -850,7 +850,7 @@ now([timezone]) - 当前日期和时间。 -类型: [Datetime](../../sql-reference/data-types/datetime.md). +类型: [DateTime](../../sql-reference/data-types/datetime.md). **示例** From 0e5e58bed97db8ed66b140df2dce4976bd02e1da Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 15 Nov 2022 12:18:24 +0000 Subject: [PATCH 51/80] Remove exception if shared ID already created --- .../MergeTree/ReplicatedMergeTreeAttachThread.cpp | 1 + src/Storages/StorageReplicatedMergeTree.cpp | 8 +++++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp index 7f91ffee1fe..47f10acb157 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp @@ -182,6 +182,7 @@ void ReplicatedMergeTreeAttachThread::runImpl() storage.createNewZooKeeperNodes(); storage.syncPinnedPartUUIDs(); + std::lock_guard lock(storage.table_shared_id_mutex); storage.createTableSharedID(); }; diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index bbbb4b6d22c..8b4788c8d55 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -7609,8 +7609,6 @@ std::unique_ptr StorageReplicatedMergeTree::getDefaultSetting String StorageReplicatedMergeTree::getTableSharedID() const { - /// Lock is not required in other places because createTableSharedID() - /// can be called only during table initialization std::lock_guard lock(table_shared_id_mutex); /// Can happen if table was partially initialized before drop by DatabaseCatalog @@ -7637,8 +7635,12 @@ String StorageReplicatedMergeTree::getTableSharedID() const void StorageReplicatedMergeTree::createTableSharedID() const { LOG_DEBUG(log, "Creating shared ID for table {}", getStorageID().getNameForLogs()); + // can be set by the call to getTableSharedID if (table_shared_id != UUIDHelpers::Nil) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Table shared id already initialized"); + { + LOG_INFO(log, "Shared ID already set to {}", table_shared_id); + return; + } auto zookeeper = getZooKeeper(); String zookeeper_table_id_path = fs::path(zookeeper_path) / "table_shared_id"; From d7c882951f1e3c5c66d744e9105a6d30a419a6e0 Mon Sep 17 00:00:00 2001 From: vdimir Date: Tue, 15 Nov 2022 12:36:28 +0000 Subject: [PATCH 52/80] Fix nullptr dereference in collectScopeValidIdentifiersForTypoCorrection --- src/Analyzer/Passes/QueryAnalysisPass.cpp | 20 ++++++++++++++++++- .../02480_analyzer_alias_nullptr.reference | 0 .../02480_analyzer_alias_nullptr.sql | 3 +++ 3 files changed, 22 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/02480_analyzer_alias_nullptr.reference create mode 100644 tests/queries/0_stateless/02480_analyzer_alias_nullptr.sql diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index 24b88a729be..760d036ab4f 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -1517,6 +1517,7 @@ void QueryAnalyzer::collectScopeValidIdentifiersForTypoCorrection( { for (const auto & [name, expression] : scope.alias_name_to_expression_node) { + assert(expression); auto expression_identifier = Identifier(name); valid_identifiers_result.insert(expression_identifier); @@ -2170,7 +2171,24 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromAliases(const Identifier auto & alias_identifier_node = it->second->as(); auto identifier = alias_identifier_node.getIdentifier(); auto lookup_result = tryResolveIdentifier(IdentifierLookup{identifier, identifier_lookup.lookup_context}, scope, identifier_resolve_settings); - it->second = lookup_result.resolved_identifier; + if (lookup_result.isResolved()) + { + it->second = lookup_result.resolved_identifier; + } + else + { + alias_name_to_node_map.erase(it); + + std::unordered_set valid_identifiers; + collectScopeWithParentScopesValidIdentifiersForTypoCorrection(identifier, scope, true, false, false, valid_identifiers); + + auto hints = collectIdentifierTypoHints(identifier, valid_identifiers); + throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER, "Unknown {} identifier '{}' in scope {}{}", + toStringLowercase(IdentifierLookupContext::EXPRESSION), + identifier.getFullName(), + scope.scope_node->formatASTForErrorMessage(), + getHintsErrorMessageSuffix(hints)); + } /** During collection of aliases if node is identifier and has alias, we cannot say if it is * column or function node. Check QueryExpressionsAliasVisitor documentation for clarification. diff --git a/tests/queries/0_stateless/02480_analyzer_alias_nullptr.reference b/tests/queries/0_stateless/02480_analyzer_alias_nullptr.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02480_analyzer_alias_nullptr.sql b/tests/queries/0_stateless/02480_analyzer_alias_nullptr.sql new file mode 100644 index 00000000000..f6b381e5c70 --- /dev/null +++ b/tests/queries/0_stateless/02480_analyzer_alias_nullptr.sql @@ -0,0 +1,3 @@ +SET allow_experimental_analyzer = 1; + +SELECT min(b), x AS b FROM (SELECT max(number) FROM numbers(1)); -- { serverError UNKNOWN_IDENTIFIER } From 143b67d0beecfd0f51c6d4499a0383e003baea96 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Tue, 15 Nov 2022 15:40:06 +0100 Subject: [PATCH 53/80] Fix ubsan in AggregateFunctionMinMaxAny::read with high sizes --- src/AggregateFunctions/AggregateFunctionMinMaxAny.h | 7 ++++++- src/Common/Arena.h | 2 +- .../0_stateless/02481_i43247_ubsan_in_minmaxany.reference | 0 .../0_stateless/02481_i43247_ubsan_in_minmaxany.sql | 3 +++ 4 files changed, 10 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/02481_i43247_ubsan_in_minmaxany.reference create mode 100644 tests/queries/0_stateless/02481_i43247_ubsan_in_minmaxany.sql diff --git a/src/AggregateFunctions/AggregateFunctionMinMaxAny.h b/src/AggregateFunctions/AggregateFunctionMinMaxAny.h index 46be7331195..18f065caaf9 100644 --- a/src/AggregateFunctions/AggregateFunctionMinMaxAny.h +++ b/src/AggregateFunctions/AggregateFunctionMinMaxAny.h @@ -29,6 +29,7 @@ namespace ErrorCodes { extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int NOT_IMPLEMENTED; + extern const int TOO_LARGE_STRING_SIZE; } /** Aggregate functions that store one of passed values. @@ -521,7 +522,11 @@ public: { if (capacity < rhs_size) { - capacity = static_cast(roundUpToPowerOfTwoOrZero(rhs_size)); + capacity = static_cast(roundUpToPowerOfTwoOrZero(rhs_size)); + /// It might happen if the size was too big and the rounded value does not fit a size_t + if (unlikely(capacity <= rhs_size)) + throw Exception(ErrorCodes::TOO_LARGE_STRING_SIZE, "String size is too big ({})", rhs_size); + /// Don't free large_data here. large_data = arena->alloc(capacity); } diff --git a/src/Common/Arena.h b/src/Common/Arena.h index 17d53acd8f7..5772dff6bca 100644 --- a/src/Common/Arena.h +++ b/src/Common/Arena.h @@ -141,7 +141,7 @@ public: /// Get piece of memory, without alignment. char * alloc(size_t size) { - if (unlikely(head->pos + size > head->end)) + if (unlikely(static_cast(size) > head->end - head->pos)) addMemoryChunk(size); char * res = head->pos; diff --git a/tests/queries/0_stateless/02481_i43247_ubsan_in_minmaxany.reference b/tests/queries/0_stateless/02481_i43247_ubsan_in_minmaxany.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02481_i43247_ubsan_in_minmaxany.sql b/tests/queries/0_stateless/02481_i43247_ubsan_in_minmaxany.sql new file mode 100644 index 00000000000..7204053de04 --- /dev/null +++ b/tests/queries/0_stateless/02481_i43247_ubsan_in_minmaxany.sql @@ -0,0 +1,3 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/43247 +SELECT finalizeAggregation(CAST('AggregateFunction(categoricalInformationValue, Nullable(UInt8), UInt8)AggregateFunction(categoricalInformationValue, Nullable(UInt8), UInt8)', + 'AggregateFunction(min, String)')); -- { serverError 131 } From d49b65cf1ead8fbd4a5d996c2d405f18ff954aca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Tue, 15 Nov 2022 16:31:12 +0100 Subject: [PATCH 54/80] Fix capacity check --- src/AggregateFunctions/AggregateFunctionMinMaxAny.h | 2 +- .../0_stateless/02481_i43247_ubsan_in_minmaxany.reference | 1 + tests/queries/0_stateless/02481_i43247_ubsan_in_minmaxany.sql | 4 ++++ 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/AggregateFunctions/AggregateFunctionMinMaxAny.h b/src/AggregateFunctions/AggregateFunctionMinMaxAny.h index 18f065caaf9..1f3c51c1c1c 100644 --- a/src/AggregateFunctions/AggregateFunctionMinMaxAny.h +++ b/src/AggregateFunctions/AggregateFunctionMinMaxAny.h @@ -524,7 +524,7 @@ public: { capacity = static_cast(roundUpToPowerOfTwoOrZero(rhs_size)); /// It might happen if the size was too big and the rounded value does not fit a size_t - if (unlikely(capacity <= rhs_size)) + if (unlikely(capacity < rhs_size)) throw Exception(ErrorCodes::TOO_LARGE_STRING_SIZE, "String size is too big ({})", rhs_size); /// Don't free large_data here. diff --git a/tests/queries/0_stateless/02481_i43247_ubsan_in_minmaxany.reference b/tests/queries/0_stateless/02481_i43247_ubsan_in_minmaxany.reference index e69de29bb2d..3e3abfb9a41 100644 --- a/tests/queries/0_stateless/02481_i43247_ubsan_in_minmaxany.reference +++ b/tests/queries/0_stateless/02481_i43247_ubsan_in_minmaxany.reference @@ -0,0 +1 @@ +0123456789012345678901234567890123456789012345678901234567890123 diff --git a/tests/queries/0_stateless/02481_i43247_ubsan_in_minmaxany.sql b/tests/queries/0_stateless/02481_i43247_ubsan_in_minmaxany.sql index 7204053de04..7dc29c2daae 100644 --- a/tests/queries/0_stateless/02481_i43247_ubsan_in_minmaxany.sql +++ b/tests/queries/0_stateless/02481_i43247_ubsan_in_minmaxany.sql @@ -1,3 +1,7 @@ -- https://github.com/ClickHouse/ClickHouse/issues/43247 SELECT finalizeAggregation(CAST('AggregateFunction(categoricalInformationValue, Nullable(UInt8), UInt8)AggregateFunction(categoricalInformationValue, Nullable(UInt8), UInt8)', 'AggregateFunction(min, String)')); -- { serverError 131 } + +-- Value from hex(minState('0123456789012345678901234567890123456789012345678901234567890123')). Size 63 + 1 (64) +SELECT finalizeAggregation(CAST(unhex('4000000030313233343536373839303132333435363738393031323334353637383930313233343536373839303132333435363738393031323334353637383930313233'), + 'AggregateFunction(min, String)')); From 5aae1d07246f6b1ce46b43cb8f59fe4495864185 Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Tue, 15 Nov 2022 10:49:13 -0500 Subject: [PATCH 55/80] update note ot include default user --- .../external-dictionaries/external-dicts-dict-layout.md | 2 +- .../external-dictionaries/external-dicts-dict-lifetime.md | 2 +- .../external-dictionaries/external-dicts-dict-polygon.md | 2 +- .../external-dictionaries/external-dicts-dict-sources.md | 2 +- .../external-dictionaries/external-dicts-dict-structure.md | 2 +- .../dictionaries/external-dictionaries/external-dicts-dict.md | 2 +- .../dictionaries/external-dictionaries/external-dicts.md | 4 ++-- 7 files changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md index 49a8620b609..4e379365912 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md @@ -25,7 +25,7 @@ ClickHouse generates an exception for errors with dictionaries. Examples of erro You can view the list of dictionaries and their statuses in the [system.dictionaries](../../../operations/system-tables/dictionaries.md) table. :::tip -If you are using a DICTIONARY with ClickHouse Cloud please use the DDL query option to create your dictionaries. +If you are using a DICTIONARY with ClickHouse Cloud please use the DDL query option to create your dictionaries, and create your dictionary as user `default`. Also, verify the list of supported dictionary sources in the [Cloud Compatibility guide](/docs/en/whats-new/cloud-capabilities.md). ::: diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md index f0d6cb64a0b..f5b70522841 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md @@ -13,7 +13,7 @@ Dictionary updates (other than loading for first use) do not block queries. Duri Example of settings: :::tip -If you are using a DICTIONARY with ClickHouse Cloud please use the DDL query option to create your dictionaries. +If you are using a DICTIONARY with ClickHouse Cloud please use the DDL query option to create your dictionaries, and create your dictionary as user `default`. Also, verify the list of supported dictionary sources in the [Cloud Compatibility guide](/docs/en/whats-new/cloud-capabilities.md). ::: diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.md index 42c4a7faa73..aa85247c1be 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.md @@ -11,7 +11,7 @@ For example: defining a city area by geographical coordinates. Example of a polygon dictionary configuration: :::tip -If you are using a DICTIONARY with ClickHouse Cloud please use the DDL query option to create your dictionaries. +If you are using a DICTIONARY with ClickHouse Cloud please use the DDL query option to create your dictionaries, and create your dictionary as user `default`. Also, verify the list of supported dictionary sources in the [Cloud Compatibility guide](/docs/en/whats-new/cloud-capabilities.md). ::: diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md index 8218c066530..bcd5d4417a5 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md @@ -7,7 +7,7 @@ sidebar_label: Dictionary Sources # Dictionary Sources :::tip -If you are using a DICTIONARY with ClickHouse Cloud please use the DDL query option to create your dictionaries. +If you are using a DICTIONARY with ClickHouse Cloud please use the DDL query option to create your dictionaries, and create your dictionary as user `default`. Also, verify the list of supported dictionary sources in the [Cloud Compatibility guide](/docs/en/whats-new/cloud-capabilities.md). ::: diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md index 20056c6ac9c..ba8411db1ce 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md @@ -7,7 +7,7 @@ sidebar_label: Dictionary Key and Fields # Dictionary Key and Fields :::tip -If you are using a DICTIONARY with ClickHouse Cloud please use the DDL query option to create your dictionaries. +If you are using a DICTIONARY with ClickHouse Cloud please use the DDL query option to create your dictionaries, and create your dictionary as user `default`. Also, verify the list of supported dictionary sources in the [Cloud Compatibility guide](/docs/en/whats-new/cloud-capabilities.md). ::: diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict.md index f95a3593ae4..1d273a9e9ed 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict.md @@ -7,7 +7,7 @@ sidebar_label: Configuring a Dictionary # Configuring a Dictionary :::tip -If you are using a DICTIONARY with ClickHouse Cloud please use the DDL query option to create your dictionaries. +If you are using a DICTIONARY with ClickHouse Cloud please use the DDL query option to create your dictionaries, and create your dictionary as user `default`. Also, verify the list of supported dictionary sources in the [Cloud Compatibility guide](/docs/en/whats-new/cloud-capabilities.md). ::: diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts.md index 19bb7474d40..34d8b7b74a4 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts.md @@ -7,7 +7,7 @@ sidebar_label: General Description # Dictionaries :::tip -If you are using a DICTIONARY with ClickHouse Cloud please use the DDL query option to create your dictionaries. +If you are using a DICTIONARY with ClickHouse Cloud please use the DDL query option to create your dictionaries, and create your dictionary as user `default`. Also, verify the list of supported dictionary sources in the [Cloud Compatibility guide](/docs/en/whats-new/cloud-capabilities.md). ::: @@ -36,7 +36,7 @@ Dictionaries can be created with [DDL queries](../../../sql-reference/statements ## Creating a dictionary with a configuration file :::note -Creating a dictionary with a configuration file is not applicable to ClickHouse Cloud. Please use DDL (see above). +Creating a dictionary with a configuration file is not applicable to ClickHouse Cloud. Please use DDL (see above), and create your dictionary as user `default`. ::: The dictionary configuration file has the following format: From 03968eb694e414281ac5b0529edd2785b266ef59 Mon Sep 17 00:00:00 2001 From: vdimir Date: Tue, 15 Nov 2022 16:16:04 +0000 Subject: [PATCH 56/80] Better message in wait_zookeeper_to_start --- tests/integration/helpers/cluster.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 666833013c8..a0e8e0b0cce 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -2070,10 +2070,10 @@ class ClickHouseCluster: logging.debug("All instances of ZooKeeper started") return except Exception as ex: - logging.debug("Can't connect to ZooKeeper " + str(ex)) + logging.debug(f"Can't connect to ZooKeeper {instance}: {ex}") time.sleep(0.5) - raise Exception("Cannot wait ZooKeeper container") + raise Exception("Cannot wait ZooKeeper container (probably it's a `iptables-nft` issue, you may try to `sudo iptables -P FORWARD ACCEPT`)") def make_hdfs_api(self, timeout=180, kerberized=False): if kerberized: From aaeeeaf1fc1c58d1d96521d9c604a213cb8cd3c4 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 15 Nov 2022 16:40:01 +0000 Subject: [PATCH 57/80] Automatic style fix --- tests/integration/helpers/cluster.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index a0e8e0b0cce..a190126a8ff 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -2073,7 +2073,9 @@ class ClickHouseCluster: logging.debug(f"Can't connect to ZooKeeper {instance}: {ex}") time.sleep(0.5) - raise Exception("Cannot wait ZooKeeper container (probably it's a `iptables-nft` issue, you may try to `sudo iptables -P FORWARD ACCEPT`)") + raise Exception( + "Cannot wait ZooKeeper container (probably it's a `iptables-nft` issue, you may try to `sudo iptables -P FORWARD ACCEPT`)" + ) def make_hdfs_api(self, timeout=180, kerberized=False): if kerberized: From 63ae261119da4484d02c16979b73e3231b0ef2e3 Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Tue, 15 Nov 2022 12:44:54 -0500 Subject: [PATCH 58/80] move tip to snippet --- .../external-dictionaries/_snippet_dictionary_in_cloud.md | 4 ++++ .../external-dictionaries/external-dicts-dict-layout.md | 6 ++---- .../external-dictionaries/external-dicts-dict-lifetime.md | 6 ++---- .../external-dictionaries/external-dicts-dict-polygon.md | 6 ++---- .../external-dictionaries/external-dicts-dict-sources.md | 6 ++---- .../external-dictionaries/external-dicts-dict-structure.md | 6 ++---- .../external-dictionaries/external-dicts-dict.md | 6 ++---- .../dictionaries/external-dictionaries/external-dicts.md | 6 ++---- 8 files changed, 18 insertions(+), 28 deletions(-) create mode 100644 docs/en/sql-reference/dictionaries/external-dictionaries/_snippet_dictionary_in_cloud.md diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/_snippet_dictionary_in_cloud.md b/docs/en/sql-reference/dictionaries/external-dictionaries/_snippet_dictionary_in_cloud.md new file mode 100644 index 00000000000..e6a0dac7afb --- /dev/null +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/_snippet_dictionary_in_cloud.md @@ -0,0 +1,4 @@ +:::tip +If you are using a dictionary with ClickHouse Cloud please use the DDL query option to create your dictionaries, and create your dictionary as user `default`. +Also, verify the list of supported dictionary sources in the [Cloud Compatibility guide](/docs/en/whats-new/cloud-capabilities.md). +::: diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md index 4e379365912..aac0db208c6 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md @@ -3,6 +3,7 @@ slug: /en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-l sidebar_position: 41 sidebar_label: Storing Dictionaries in Memory --- +import CloudDetails from '@site/docs/en/sql-reference/dictionaries/external-dictionaries/_snippet_dictionary_in_cloud.md'; # Storing Dictionaries in Memory @@ -24,10 +25,7 @@ ClickHouse generates an exception for errors with dictionaries. Examples of erro You can view the list of dictionaries and their statuses in the [system.dictionaries](../../../operations/system-tables/dictionaries.md) table. -:::tip -If you are using a DICTIONARY with ClickHouse Cloud please use the DDL query option to create your dictionaries, and create your dictionary as user `default`. -Also, verify the list of supported dictionary sources in the [Cloud Compatibility guide](/docs/en/whats-new/cloud-capabilities.md). -::: + The configuration looks like this: diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md index f5b70522841..e4edad4d9a1 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md @@ -3,6 +3,7 @@ slug: /en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-l sidebar_position: 42 sidebar_label: Dictionary Updates --- +import CloudDetails from '@site/docs/en/sql-reference/dictionaries/external-dictionaries/_snippet_dictionary_in_cloud.md'; # Dictionary Updates @@ -12,10 +13,7 @@ Dictionary updates (other than loading for first use) do not block queries. Duri Example of settings: -:::tip -If you are using a DICTIONARY with ClickHouse Cloud please use the DDL query option to create your dictionaries, and create your dictionary as user `default`. -Also, verify the list of supported dictionary sources in the [Cloud Compatibility guide](/docs/en/whats-new/cloud-capabilities.md). -::: + ``` xml diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.md index aa85247c1be..366d88e07c7 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.md @@ -4,16 +4,14 @@ sidebar_position: 46 sidebar_label: Polygon Dictionaries With Grids title: "Polygon dictionaries" --- +import CloudDetails from '@site/docs/en/sql-reference/dictionaries/external-dictionaries/_snippet_dictionary_in_cloud.md'; Polygon dictionaries allow you to efficiently search for the polygon containing specified points. For example: defining a city area by geographical coordinates. Example of a polygon dictionary configuration: -:::tip -If you are using a DICTIONARY with ClickHouse Cloud please use the DDL query option to create your dictionaries, and create your dictionary as user `default`. -Also, verify the list of supported dictionary sources in the [Cloud Compatibility guide](/docs/en/whats-new/cloud-capabilities.md). -::: + ``` xml diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md index bcd5d4417a5..4eb96fe80a2 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md @@ -3,13 +3,11 @@ slug: /en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-s sidebar_position: 43 sidebar_label: Dictionary Sources --- +import CloudDetails from '@site/docs/en/sql-reference/dictionaries/external-dictionaries/_snippet_dictionary_in_cloud.md'; # Dictionary Sources -:::tip -If you are using a DICTIONARY with ClickHouse Cloud please use the DDL query option to create your dictionaries, and create your dictionary as user `default`. -Also, verify the list of supported dictionary sources in the [Cloud Compatibility guide](/docs/en/whats-new/cloud-capabilities.md). -::: + A dictionary can be connected to ClickHouse from many different sources. diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md index ba8411db1ce..881630167e3 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md @@ -3,13 +3,11 @@ slug: /en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-s sidebar_position: 44 sidebar_label: Dictionary Key and Fields --- +import CloudDetails from '@site/docs/en/sql-reference/dictionaries/external-dictionaries/_snippet_dictionary_in_cloud.md'; # Dictionary Key and Fields -:::tip -If you are using a DICTIONARY with ClickHouse Cloud please use the DDL query option to create your dictionaries, and create your dictionary as user `default`. -Also, verify the list of supported dictionary sources in the [Cloud Compatibility guide](/docs/en/whats-new/cloud-capabilities.md). -::: + The `structure` clause describes the dictionary key and fields available for queries. diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict.md index 1d273a9e9ed..76ca3ac978f 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict.md @@ -3,13 +3,11 @@ slug: /en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict sidebar_position: 40 sidebar_label: Configuring a Dictionary --- +import CloudDetails from '@site/docs/en/sql-reference/dictionaries/external-dictionaries/_snippet_dictionary_in_cloud.md'; # Configuring a Dictionary -:::tip -If you are using a DICTIONARY with ClickHouse Cloud please use the DDL query option to create your dictionaries, and create your dictionary as user `default`. -Also, verify the list of supported dictionary sources in the [Cloud Compatibility guide](/docs/en/whats-new/cloud-capabilities.md). -::: + If dictionary is configured using xml file, than dictionary configuration has the following structure: diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts.md index 34d8b7b74a4..7498afd8492 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts.md @@ -3,13 +3,11 @@ slug: /en/sql-reference/dictionaries/external-dictionaries/external-dicts sidebar_position: 39 sidebar_label: General Description --- +import CloudDetails from '@site/docs/en/sql-reference/dictionaries/external-dictionaries/_snippet_dictionary_in_cloud.md'; # Dictionaries -:::tip -If you are using a DICTIONARY with ClickHouse Cloud please use the DDL query option to create your dictionaries, and create your dictionary as user `default`. -Also, verify the list of supported dictionary sources in the [Cloud Compatibility guide](/docs/en/whats-new/cloud-capabilities.md). -::: + You can add your own dictionaries from various data sources. The source for a dictionary can be a ClickHouse table, a DDL query, a local text or executable file, an HTTP(s) resource, or another DBMS. For more information, see “[Dictionary Sources](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md)”. From 4778b5c13f7aeb2602047a790fcf640251b4c2b9 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Tue, 15 Nov 2022 20:23:49 +0000 Subject: [PATCH 59/80] Fix IS (NOT)NULL operator --- src/Parsers/ExpressionListParsers.cpp | 60 +++++++++---------- .../02477_is_null_parser.reference | 3 + .../0_stateless/02477_is_null_parser.sql | 3 + 3 files changed, 36 insertions(+), 30 deletions(-) create mode 100644 tests/queries/0_stateless/02477_is_null_parser.reference create mode 100644 tests/queries/0_stateless/02477_is_null_parser.sql diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index d29aa248ec4..29158254e88 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -2199,40 +2199,40 @@ std::vector> ParserExpressionImpl::operators_t {"AND", Operator("and", 4, 2, OperatorType::Mergeable)}, {"BETWEEN", Operator("", 6, 0, OperatorType::StartBetween)}, {"NOT BETWEEN", Operator("", 6, 0, OperatorType::StartNotBetween)}, - {"IS NULL", Operator("isNull", 8, 1, OperatorType::IsNull)}, - {"IS NOT NULL", Operator("isNotNull", 8, 1, OperatorType::IsNull)}, - {"==", Operator("equals", 9, 2, OperatorType::Comparison)}, - {"!=", Operator("notEquals", 9, 2, OperatorType::Comparison)}, - {"<>", Operator("notEquals", 9, 2, OperatorType::Comparison)}, - {"<=", Operator("lessOrEquals", 9, 2, OperatorType::Comparison)}, - {">=", Operator("greaterOrEquals", 9, 2, OperatorType::Comparison)}, - {"<", Operator("less", 9, 2, OperatorType::Comparison)}, - {">", Operator("greater", 9, 2, OperatorType::Comparison)}, - {"=", Operator("equals", 9, 2, OperatorType::Comparison)}, - {"LIKE", Operator("like", 9, 2)}, - {"ILIKE", Operator("ilike", 9, 2)}, - {"NOT LIKE", Operator("notLike", 9, 2)}, - {"NOT ILIKE", Operator("notILike", 9, 2)}, - {"IN", Operator("in", 9, 2)}, - {"NOT IN", Operator("notIn", 9, 2)}, - {"GLOBAL IN", Operator("globalIn", 9, 2)}, - {"GLOBAL NOT IN", Operator("globalNotIn", 9, 2)}, - {"||", Operator("concat", 10, 2, OperatorType::Mergeable)}, - {"+", Operator("plus", 11, 2)}, - {"-", Operator("minus", 11, 2)}, - {"*", Operator("multiply", 12, 2)}, - {"/", Operator("divide", 12, 2)}, - {"%", Operator("modulo", 12, 2)}, - {"MOD", Operator("modulo", 12, 2)}, - {"DIV", Operator("intDiv", 12, 2)}, - {".", Operator("tupleElement", 14, 2, OperatorType::TupleElement)}, - {"[", Operator("arrayElement", 14, 2, OperatorType::ArrayElement)}, - {"::", Operator("CAST", 14, 2, OperatorType::Cast)}, + {"==", Operator("equals", 8, 2, OperatorType::Comparison)}, + {"!=", Operator("notEquals", 8, 2, OperatorType::Comparison)}, + {"<>", Operator("notEquals", 8, 2, OperatorType::Comparison)}, + {"<=", Operator("lessOrEquals", 8, 2, OperatorType::Comparison)}, + {">=", Operator("greaterOrEquals", 8, 2, OperatorType::Comparison)}, + {"<", Operator("less", 8, 2, OperatorType::Comparison)}, + {">", Operator("greater", 8, 2, OperatorType::Comparison)}, + {"=", Operator("equals", 8, 2, OperatorType::Comparison)}, + {"LIKE", Operator("like", 8, 2)}, + {"ILIKE", Operator("ilike", 8, 2)}, + {"NOT LIKE", Operator("notLike", 8, 2)}, + {"NOT ILIKE", Operator("notILike", 8, 2)}, + {"IN", Operator("in", 8, 2)}, + {"NOT IN", Operator("notIn", 8, 2)}, + {"GLOBAL IN", Operator("globalIn", 8, 2)}, + {"GLOBAL NOT IN", Operator("globalNotIn", 8, 2)}, + {"||", Operator("concat", 9, 2, OperatorType::Mergeable)}, + {"+", Operator("plus", 10, 2)}, + {"-", Operator("minus", 10, 2)}, + {"*", Operator("multiply", 11, 2)}, + {"/", Operator("divide", 11, 2)}, + {"%", Operator("modulo", 11, 2)}, + {"MOD", Operator("modulo", 11, 2)}, + {"DIV", Operator("intDiv", 11, 2)}, + {".", Operator("tupleElement", 13, 2, OperatorType::TupleElement)}, + {"[", Operator("arrayElement", 13, 2, OperatorType::ArrayElement)}, + {"::", Operator("CAST", 13, 2, OperatorType::Cast)}, + {"IS NULL", Operator("isNull", 13, 1, OperatorType::IsNull)}, + {"IS NOT NULL", Operator("isNotNull", 13, 1, OperatorType::IsNull)}, }); std::vector> ParserExpressionImpl::unary_operators_table({ {"NOT", Operator("not", 5, 1)}, - {"-", Operator("negate", 13, 1)} + {"-", Operator("negate", 12, 1)} }); Operator ParserExpressionImpl::finish_between_operator = Operator("", 7, 0, OperatorType::FinishBetween); diff --git a/tests/queries/0_stateless/02477_is_null_parser.reference b/tests/queries/0_stateless/02477_is_null_parser.reference new file mode 100644 index 00000000000..57d96862011 --- /dev/null +++ b/tests/queries/0_stateless/02477_is_null_parser.reference @@ -0,0 +1,3 @@ +SELECT (\'a\' IS NULL) + (\'b\' IS NOT NULL) +SELECT (\'a\' IS NULL) = 0 +SELECT CAST(1 IS NULL, \'Int32\') diff --git a/tests/queries/0_stateless/02477_is_null_parser.sql b/tests/queries/0_stateless/02477_is_null_parser.sql new file mode 100644 index 00000000000..b95a35fde21 --- /dev/null +++ b/tests/queries/0_stateless/02477_is_null_parser.sql @@ -0,0 +1,3 @@ +EXPLAIN SYNTAX SELECT 'a' IS NULL + 'b' IS NOT NULL; +EXPLAIN SYNTAX SELECT 'a' IS NULL = 0; +EXPLAIN SYNTAX SELECT 1 IS NULL :: Int32; From fcdc9dfaacfb1d473c142cad7303d9c12e170a1b Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Tue, 15 Nov 2022 20:52:18 +0000 Subject: [PATCH 60/80] Make test_global_overcommit_tracker non-parallel --- tests/integration/parallel_skip.json | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/integration/parallel_skip.json b/tests/integration/parallel_skip.json index 3b4d1f2f29a..ba593b76bbf 100644 --- a/tests/integration/parallel_skip.json +++ b/tests/integration/parallel_skip.json @@ -48,6 +48,8 @@ "test_system_replicated_fetches/test.py::test_system_replicated_fetches", "test_zookeeper_config_load_balancing/test.py::test_round_robin", + "test_global_overcommit_tracker/test.py::test_global_overcommit", + "test_user_ip_restrictions/test.py::test_ipv4", "test_user_ip_restrictions/test.py::test_ipv6" ] From c60b98f57630015f577385640c102eb6d23cce72 Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Tue, 15 Nov 2022 16:17:43 -0500 Subject: [PATCH 61/80] updates from review --- .../dictionaries/external-dictionaries/external-dicts.md | 8 ++++++-- docs/en/sql-reference/statements/create/dictionary.md | 2 +- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts.md index 7498afd8492..06b5b8a6746 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts.md @@ -9,7 +9,7 @@ import CloudDetails from '@site/docs/en/sql-reference/dictionaries/external-dict -You can add your own dictionaries from various data sources. The source for a dictionary can be a ClickHouse table, a DDL query, a local text or executable file, an HTTP(s) resource, or another DBMS. For more information, see “[Dictionary Sources](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md)”. +You can add your own dictionaries from various data sources. The source for a dictionary can be a ClickHouse table, a local text or executable file, an HTTP(s) resource, or another DBMS. For more information, see “[Dictionary Sources](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md)”. ClickHouse: @@ -29,7 +29,11 @@ The [dictionaries](../../../operations/system-tables/dictionaries.md#system_tabl ## Creating a dictionary with a DDL query -Dictionaries can be created with [DDL queries](../../../sql-reference/statements/create/dictionary.md). This does not require any additional records in a server configuration file. This allows dictionaries to be worked with as first-class entities, like tables or views. +Dictionaries can be created with [DDL queries](../../../sql-reference/statements/create/dictionary.md), and this is the recommended method because with DDL created dictionaries: +- No additional records are added to server configuration files +- The dictionaries can be worked with as first-class entities, like tables or views +- Data can be read directly, using familiar SELECT rather than dictionary table functions +- The dictionaries can be easily renamed ## Creating a dictionary with a configuration file diff --git a/docs/en/sql-reference/statements/create/dictionary.md b/docs/en/sql-reference/statements/create/dictionary.md index 37051f8031a..a470b071971 100644 --- a/docs/en/sql-reference/statements/create/dictionary.md +++ b/docs/en/sql-reference/statements/create/dictionary.md @@ -130,4 +130,4 @@ Please see the details in [Dictionary sources](/docs/en/sql-reference/dictionari **See Also** - For more information, see the [Dictionaries](../../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) section. -- [system.dictionaries](../../../operations/system-tables/dictionaries.md) — This table contains information about [dictionaries](../../../sql-reference/dictionaries/external-dictionaries/external-dicts.md). +- [system.dictionaries](../../../operations/system-tables/dictionaries.md) — This table contains information about [Dictionaries](../../../sql-reference/dictionaries/external-dictionaries/external-dicts.md). From d9adf2f02d50a6c15229150516f90d94aa97a65f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E6=89=AC?= <654010905@qq.com> Date: Wed, 16 Nov 2022 09:34:04 +0800 Subject: [PATCH 62/80] fix doc --- docs/en/sql-reference/functions/math-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/math-functions.md b/docs/en/sql-reference/functions/math-functions.md index 47c27268b09..bcd118ce0be 100644 --- a/docs/en/sql-reference/functions/math-functions.md +++ b/docs/en/sql-reference/functions/math-functions.md @@ -555,7 +555,7 @@ Result: Computes the factorial of an integer value. It works with any native integer type including UInt(8|16|32|64) and Int(8|16|32|64). The return type is UInt64. -The factorial of 0 is 1. Likewise, the factorial() function returns 1 for any negative value. The maximum positive value for the input argument is 20, a value of 21 or greater overflows the range for Int64 and will cause exception throw. +The factorial of 0 is 1. Likewise, the factorial() function returns 1 for any negative value. The maximum positive value for the input argument is 20, a value of 21 or greater will cause exception throw. **Syntax** From 6393d11dabe76d8e7f73a8fa5c6cc054f670a91b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E6=89=AC?= <654010905@qq.com> Date: Wed, 16 Nov 2022 09:34:45 +0800 Subject: [PATCH 63/80] fix doc --- src/Functions/factorial.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/factorial.cpp b/src/Functions/factorial.cpp index b76ef90a48d..4e96391bccd 100644 --- a/src/Functions/factorial.cpp +++ b/src/Functions/factorial.cpp @@ -103,7 +103,7 @@ REGISTER_FUNCTION(Factorial) R"( Computes the factorial of an integer value. It works with any native integer type including UInt(8|16|32|64) and Int(8|16|32|64). The return type is UInt64. -The factorial of 0 is 1. Likewise, the factorial() function returns 1 for any negative value. The maximum positive value for the input argument is 20, a value of 21 or greater overflows the range for Int64 and will cause exception throw. +The factorial of 0 is 1. Likewise, the factorial() function returns 1 for any negative value. The maximum positive value for the input argument is 20, a value of 21 or greater will cause exception throw. )", Documentation::Examples{{"factorial", "SELECT factorial(10)"}}, Documentation::Categories{"Mathematical"}}, From 81971acf3551069aa87fceb757da146af24986ed Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Wed, 16 Nov 2022 02:32:44 +0000 Subject: [PATCH 64/80] Fix docs --- docs/en/engines/table-engines/log-family/index.md | 2 +- .../table-engines/mergetree-family/mergetree.md | 4 ++-- .../table-engines/mergetree-family/replication.md | 2 +- docs/en/engines/table-engines/special/join.md | 2 +- docs/en/operations/settings/index.md | 2 +- docs/en/operations/settings/settings.md | 14 +++++++------- docs/en/operations/system-tables/mutations.md | 4 ++-- docs/en/operations/system-tables/parts.md | 2 +- docs/en/operations/system-tables/parts_columns.md | 2 +- docs/en/sql-reference/statements/alter/column.md | 2 +- docs/en/sql-reference/statements/alter/delete.md | 6 +++--- .../en/sql-reference/statements/alter/partition.md | 4 ++-- .../sql-reference/statements/alter/projection.md | 6 +++--- .../statements/alter/skipping-index.md | 2 +- docs/en/sql-reference/statements/alter/update.md | 6 +++--- docs/ru/operations/settings/index.md | 2 +- docs/ru/operations/settings/settings.md | 2 +- docs/ru/sql-reference/statements/alter/column.md | 2 +- docs/ru/sql-reference/statements/insert-into.md | 2 +- docs/ru/sql-reference/statements/select/index.md | 2 +- docs/zh/sql-reference/statements/alter.md | 2 +- docs/zh/sql-reference/statements/insert-into.md | 2 +- 22 files changed, 37 insertions(+), 37 deletions(-) diff --git a/docs/en/engines/table-engines/log-family/index.md b/docs/en/engines/table-engines/log-family/index.md index 486c41c2496..21f857510f7 100644 --- a/docs/en/engines/table-engines/log-family/index.md +++ b/docs/en/engines/table-engines/log-family/index.md @@ -28,7 +28,7 @@ Engines: During `INSERT` queries, the table is locked, and other queries for reading and writing data both wait for the table to unlock. If there are no data writing queries, any number of data reading queries can be performed concurrently. -- Do not support [mutations](/docs/en/sql-reference/statements/alter/index.md/#alter-mutations). +- Do not support [mutations](/docs/en/sql-reference/statements/alter/index.md#alter-mutations). - Do not support indexes. diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index 7dfb5a9fed7..ce6cec079a3 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -537,7 +537,7 @@ TTL time_column TTL time_column + interval ``` -To define `interval`, use [time interval](/docs/en/sql-reference/operators/index.md/#operators-datetime) operators, for example: +To define `interval`, use [time interval](/docs/en/sql-reference/operators/index.md#operators-datetime) operators, for example: ``` sql TTL date_time + INTERVAL 1 MONTH @@ -860,7 +860,7 @@ The number of threads performing background moves of data parts can be changed b In the case of `MergeTree` tables, data is getting to disk in different ways: - As a result of an insert (`INSERT` query). -- During background merges and [mutations](/docs/en/sql-reference/statements/alter/index.md/#alter-mutations). +- During background merges and [mutations](/docs/en/sql-reference/statements/alter/index.md#alter-mutations). - When downloading from another replica. - As a result of partition freezing [ALTER TABLE … FREEZE PARTITION](/docs/en/sql-reference/statements/alter/partition.md/#alter_freeze-partition). diff --git a/docs/en/engines/table-engines/mergetree-family/replication.md b/docs/en/engines/table-engines/mergetree-family/replication.md index 67b595d0fa0..ead1a76992e 100644 --- a/docs/en/engines/table-engines/mergetree-family/replication.md +++ b/docs/en/engines/table-engines/mergetree-family/replication.md @@ -20,7 +20,7 @@ Replication works at the level of an individual table, not the entire server. A Replication does not depend on sharding. Each shard has its own independent replication. -Compressed data for `INSERT` and `ALTER` queries is replicated (for more information, see the documentation for [ALTER](/docs/en/sql-reference/statements/alter/index.md/#query_language_queries_alter)). +Compressed data for `INSERT` and `ALTER` queries is replicated (for more information, see the documentation for [ALTER](/docs/en/sql-reference/statements/alter/index.md#query_language_queries_alter)). `CREATE`, `DROP`, `ATTACH`, `DETACH` and `RENAME` queries are executed on a single server and are not replicated: diff --git a/docs/en/engines/table-engines/special/join.md b/docs/en/engines/table-engines/special/join.md index 0e51a8b7696..a49214bd00a 100644 --- a/docs/en/engines/table-engines/special/join.md +++ b/docs/en/engines/table-engines/special/join.md @@ -59,7 +59,7 @@ Main use-cases for `Join`-engine tables are following: ### Deleting Data {#deleting-data} -`ALTER DELETE` queries for `Join`-engine tables are implemented as [mutations](/docs/en/sql-reference/statements/alter/index.md/#mutations). `DELETE` mutation reads filtered data and overwrites data of memory and disk. +`ALTER DELETE` queries for `Join`-engine tables are implemented as [mutations](/docs/en/sql-reference/statements/alter/index.md#mutations). `DELETE` mutation reads filtered data and overwrites data of memory and disk. ### Limitations and Settings {#join-limitations-and-settings} diff --git a/docs/en/operations/settings/index.md b/docs/en/operations/settings/index.md index 8603257ea55..eee4058c230 100644 --- a/docs/en/operations/settings/index.md +++ b/docs/en/operations/settings/index.md @@ -26,7 +26,7 @@ Ways to configure settings, in order of priority: - When starting the ClickHouse console client in non-interactive mode, set the startup parameter `--setting=value`. - When using the HTTP API, pass CGI parameters (`URL?setting_1=value&setting_2=value...`). - - Make settings in the [SETTINGS](../../sql-reference/statements/select/index.md#settings-in-select) clause of the SELECT query. The setting value is applied only to that query and is reset to default or previous value after the query is executed. + - Make settings in the [SETTINGS](../../sql-reference/statements/select/index.md#settings-in-select-query) clause of the SELECT query. The setting value is applied only to that query and is reset to default or previous value after the query is executed. Settings that can only be made in the server config file are not covered in this section. diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 7abe4affbd1..a15a6e9bf4a 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -276,7 +276,7 @@ Default value: 0. Enables or disables the insertion of [default values](../../sql-reference/statements/create/table.md/#create-default-values) instead of [NULL](../../sql-reference/syntax.md/#null-literal) into columns with not [nullable](../../sql-reference/data-types/nullable.md/#data_type-nullable) data type. If column type is not nullable and this setting is disabled, then inserting `NULL` causes an exception. If column type is nullable, then `NULL` values are inserted as is, regardless of this setting. -This setting is applicable to [INSERT ... SELECT](../../sql-reference/statements/insert-into.md/#insert_query_insert-select) queries. Note that `SELECT` subqueries may be concatenated with `UNION ALL` clause. +This setting is applicable to [INSERT ... SELECT](../../sql-reference/statements/insert-into.md/#inserting-the-results-of-select) queries. Note that `SELECT` subqueries may be concatenated with `UNION ALL` clause. Possible values: @@ -1619,8 +1619,8 @@ These functions can be transformed: - [length](../../sql-reference/functions/array-functions.md/#array_functions-length) to read the [size0](../../sql-reference/data-types/array.md/#array-size) subcolumn. - [empty](../../sql-reference/functions/array-functions.md/#function-empty) to read the [size0](../../sql-reference/data-types/array.md/#array-size) subcolumn. - [notEmpty](../../sql-reference/functions/array-functions.md/#function-notempty) to read the [size0](../../sql-reference/data-types/array.md/#array-size) subcolumn. -- [isNull](../../sql-reference/operators/index.md/#operator-is-null) to read the [null](../../sql-reference/data-types/nullable.md/#finding-null) subcolumn. -- [isNotNull](../../sql-reference/operators/index.md/#is-not-null) to read the [null](../../sql-reference/data-types/nullable.md/#finding-null) subcolumn. +- [isNull](../../sql-reference/operators/index.md#operator-is-null) to read the [null](../../sql-reference/data-types/nullable.md/#finding-null) subcolumn. +- [isNotNull](../../sql-reference/operators/index.md#is-not-null) to read the [null](../../sql-reference/data-types/nullable.md/#finding-null) subcolumn. - [count](../../sql-reference/aggregate-functions/reference/count.md) to read the [null](../../sql-reference/data-types/nullable.md/#finding-null) subcolumn. - [mapKeys](../../sql-reference/functions/tuple-map-functions.md/#mapkeys) to read the [keys](../../sql-reference/data-types/map.md/#map-subcolumns) subcolumn. - [mapValues](../../sql-reference/functions/tuple-map-functions.md/#mapvalues) to read the [values](../../sql-reference/data-types/map.md/#map-subcolumns) subcolumn. @@ -2041,7 +2041,7 @@ Default value: 16. ## validate_polygons {#validate_polygons} -Enables or disables throwing an exception in the [pointInPolygon](../../sql-reference/functions/geo/index.md/#pointinpolygon) function, if the polygon is self-intersecting or self-tangent. +Enables or disables throwing an exception in the [pointInPolygon](../../sql-reference/functions/geo/index.md#pointinpolygon) function, if the polygon is self-intersecting or self-tangent. Possible values: @@ -2227,7 +2227,7 @@ Default value: `0`. ## mutations_sync {#mutations_sync} -Allows to execute `ALTER TABLE ... UPDATE|DELETE` queries ([mutations](../../sql-reference/statements/alter/index.md/#mutations)) synchronously. +Allows to execute `ALTER TABLE ... UPDATE|DELETE` queries ([mutations](../../sql-reference/statements/alter/index.md#mutations)) synchronously. Possible values: @@ -2239,8 +2239,8 @@ Default value: `0`. **See Also** -- [Synchronicity of ALTER Queries](../../sql-reference/statements/alter/index.md/#synchronicity-of-alter-queries) -- [Mutations](../../sql-reference/statements/alter/index.md/#mutations) +- [Synchronicity of ALTER Queries](../../sql-reference/statements/alter/index.md#synchronicity-of-alter-queries) +- [Mutations](../../sql-reference/statements/alter/index.md#mutations) ## ttl_only_drop_parts {#ttl_only_drop_parts} diff --git a/docs/en/operations/system-tables/mutations.md b/docs/en/operations/system-tables/mutations.md index 0d3b764846b..d8fb91a63f5 100644 --- a/docs/en/operations/system-tables/mutations.md +++ b/docs/en/operations/system-tables/mutations.md @@ -3,7 +3,7 @@ slug: /en/operations/system-tables/mutations --- # mutations -The table contains information about [mutations](/docs/en/sql-reference/statements/alter/index.md/#mutations) of [MergeTree](/docs/en/engines/table-engines/mergetree-family/mergetree.md) tables and their progress. Each mutation command is represented by a single row. +The table contains information about [mutations](/docs/en/sql-reference/statements/alter/index.md#mutations) of [MergeTree](/docs/en/engines/table-engines/mergetree-family/mergetree.md) tables and their progress. Each mutation command is represented by a single row. Columns: @@ -45,7 +45,7 @@ If there were problems with mutating some data parts, the following columns cont **See Also** -- [Mutations](/docs/en/sql-reference/statements/alter/index.md/#mutations) +- [Mutations](/docs/en/sql-reference/statements/alter/index.md#mutations) - [MergeTree](/docs/en/engines/table-engines/mergetree-family/mergetree.md) table engine - [ReplicatedMergeTree](/docs/en/engines/table-engines/mergetree-family/replication.md) family diff --git a/docs/en/operations/system-tables/parts.md b/docs/en/operations/system-tables/parts.md index cbabd9b27b1..bbd5385f44b 100644 --- a/docs/en/operations/system-tables/parts.md +++ b/docs/en/operations/system-tables/parts.md @@ -9,7 +9,7 @@ Each row describes one data part. Columns: -- `partition` ([String](../../sql-reference/data-types/string.md)) – The partition name. To learn what a partition is, see the description of the [ALTER](../../sql-reference/statements/alter/index.md/#query_language_queries_alter) query. +- `partition` ([String](../../sql-reference/data-types/string.md)) – The partition name. To learn what a partition is, see the description of the [ALTER](../../sql-reference/statements/alter/index.md#query_language_queries_alter) query. Formats: diff --git a/docs/en/operations/system-tables/parts_columns.md b/docs/en/operations/system-tables/parts_columns.md index d934e01f245..68757ddfbff 100644 --- a/docs/en/operations/system-tables/parts_columns.md +++ b/docs/en/operations/system-tables/parts_columns.md @@ -9,7 +9,7 @@ Each row describes one data part. Columns: -- `partition` ([String](../../sql-reference/data-types/string.md)) — The partition name. To learn what a partition is, see the description of the [ALTER](../../sql-reference/statements/alter/index.md/#query_language_queries_alter) query. +- `partition` ([String](../../sql-reference/data-types/string.md)) — The partition name. To learn what a partition is, see the description of the [ALTER](../../sql-reference/statements/alter/index.md#query_language_queries_alter) query. Formats: diff --git a/docs/en/sql-reference/statements/alter/column.md b/docs/en/sql-reference/statements/alter/column.md index 6bca0dbff42..ae8671ffa9d 100644 --- a/docs/en/sql-reference/statements/alter/column.md +++ b/docs/en/sql-reference/statements/alter/column.md @@ -254,7 +254,7 @@ The `ALTER` query lets you create and delete separate elements (columns) in nest There is no support for deleting columns in the primary key or the sampling key (columns that are used in the `ENGINE` expression). Changing the type for columns that are included in the primary key is only possible if this change does not cause the data to be modified (for example, you are allowed to add values to an Enum or to change a type from `DateTime` to `UInt32`). -If the `ALTER` query is not sufficient to make the table changes you need, you can create a new table, copy the data to it using the [INSERT SELECT](/docs/en/sql-reference/statements/insert-into.md/#insert_query_insert-select) query, then switch the tables using the [RENAME](/docs/en/sql-reference/statements/rename.md/#rename-table) query and delete the old table. You can use the [clickhouse-copier](/docs/en/operations/utilities/clickhouse-copier.md) as an alternative to the `INSERT SELECT` query. +If the `ALTER` query is not sufficient to make the table changes you need, you can create a new table, copy the data to it using the [INSERT SELECT](/docs/en/sql-reference/statements/insert-into.md/#inserting-the-results-of-select) query, then switch the tables using the [RENAME](/docs/en/sql-reference/statements/rename.md/#rename-table) query and delete the old table. You can use the [clickhouse-copier](/docs/en/operations/utilities/clickhouse-copier.md) as an alternative to the `INSERT SELECT` query. The `ALTER` query blocks all reads and writes for the table. In other words, if a long `SELECT` is running at the time of the `ALTER` query, the `ALTER` query will wait for it to complete. At the same time, all new queries to the same table will wait while this `ALTER` is running. diff --git a/docs/en/sql-reference/statements/alter/delete.md b/docs/en/sql-reference/statements/alter/delete.md index 4dcab030d13..30ed96c0b9c 100644 --- a/docs/en/sql-reference/statements/alter/delete.md +++ b/docs/en/sql-reference/statements/alter/delete.md @@ -10,7 +10,7 @@ sidebar_label: DELETE ALTER TABLE [db.]table [ON CLUSTER cluster] DELETE WHERE filter_expr ``` -Deletes data matching the specified filtering expression. Implemented as a [mutation](/docs/en/sql-reference/statements/alter/index.md/#mutations). +Deletes data matching the specified filtering expression. Implemented as a [mutation](/docs/en/sql-reference/statements/alter/index.md#mutations). :::note @@ -25,6 +25,6 @@ The synchronicity of the query processing is defined by the [mutations_sync](/do **See also** -- [Mutations](/docs/en/sql-reference/statements/alter/index.md/#mutations) -- [Synchronicity of ALTER Queries](/docs/en/sql-reference/statements/alter/index.md/#synchronicity-of-alter-queries) +- [Mutations](/docs/en/sql-reference/statements/alter/index.md#mutations) +- [Synchronicity of ALTER Queries](/docs/en/sql-reference/statements/alter/index.md#synchronicity-of-alter-queries) - [mutations_sync](/docs/en/operations/settings/settings.md/#mutations_sync) setting diff --git a/docs/en/sql-reference/statements/alter/partition.md b/docs/en/sql-reference/statements/alter/partition.md index 146c15e776e..a8cea63380c 100644 --- a/docs/en/sql-reference/statements/alter/partition.md +++ b/docs/en/sql-reference/statements/alter/partition.md @@ -270,7 +270,7 @@ ALTER TABLE hits MOVE PARTITION '2019-09-01' TO DISK 'fast_ssd' ## UPDATE IN PARTITION -Manipulates data in the specifies partition matching the specified filtering expression. Implemented as a [mutation](/docs/en/sql-reference/statements/alter/index.md/#mutations). +Manipulates data in the specifies partition matching the specified filtering expression. Implemented as a [mutation](/docs/en/sql-reference/statements/alter/index.md#mutations). Syntax: @@ -290,7 +290,7 @@ ALTER TABLE mt UPDATE x = x + 1 IN PARTITION 2 WHERE p = 2; ## DELETE IN PARTITION -Deletes data in the specifies partition matching the specified filtering expression. Implemented as a [mutation](/docs/en/sql-reference/statements/alter/index.md/#mutations). +Deletes data in the specifies partition matching the specified filtering expression. Implemented as a [mutation](/docs/en/sql-reference/statements/alter/index.md#mutations). Syntax: diff --git a/docs/en/sql-reference/statements/alter/projection.md b/docs/en/sql-reference/statements/alter/projection.md index 99cb8fb8fd1..3f6f493aa89 100644 --- a/docs/en/sql-reference/statements/alter/projection.md +++ b/docs/en/sql-reference/statements/alter/projection.md @@ -138,15 +138,15 @@ The following operations with [projections](/docs/en/engines/table-engines/merge ## DROP PROJECTION -`ALTER TABLE [db].name DROP PROJECTION name` - Removes projection description from tables metadata and deletes projection files from disk. Implemented as a [mutation](/docs/en/sql-reference/statements/alter/index.md/#mutations). +`ALTER TABLE [db].name DROP PROJECTION name` - Removes projection description from tables metadata and deletes projection files from disk. Implemented as a [mutation](/docs/en/sql-reference/statements/alter/index.md#mutations). ## MATERIALIZE PROJECTION -`ALTER TABLE [db.]table MATERIALIZE PROJECTION name IN PARTITION partition_name` - The query rebuilds the projection `name` in the partition `partition_name`. Implemented as a [mutation](/docs/en/sql-reference/statements/alter/index.md/#mutations). +`ALTER TABLE [db.]table MATERIALIZE PROJECTION name IN PARTITION partition_name` - The query rebuilds the projection `name` in the partition `partition_name`. Implemented as a [mutation](/docs/en/sql-reference/statements/alter/index.md#mutations). ## CLEAR PROJECTION -`ALTER TABLE [db.]table CLEAR PROJECTION name IN PARTITION partition_name` - Deletes projection files from disk without removing description. Implemented as a [mutation](/docs/en/sql-reference/statements/alter/index.md/#mutations). +`ALTER TABLE [db.]table CLEAR PROJECTION name IN PARTITION partition_name` - Deletes projection files from disk without removing description. Implemented as a [mutation](/docs/en/sql-reference/statements/alter/index.md#mutations). The commands `ADD`, `DROP` and `CLEAR` are lightweight in a sense that they only change metadata or remove files. diff --git a/docs/en/sql-reference/statements/alter/skipping-index.md b/docs/en/sql-reference/statements/alter/skipping-index.md index 2dadffc4527..037e4bc38c5 100644 --- a/docs/en/sql-reference/statements/alter/skipping-index.md +++ b/docs/en/sql-reference/statements/alter/skipping-index.md @@ -14,7 +14,7 @@ The following operations are available: - `ALTER TABLE [db].table_name [ON CLUSTER cluster] DROP INDEX name` - Removes index description from tables metadata and deletes index files from disk. -- `ALTER TABLE [db.]table_name [ON CLUSTER cluster] MATERIALIZE INDEX name [IN PARTITION partition_name]` - Rebuilds the secondary index `name` for the specified `partition_name`. Implemented as a [mutation](/docs/en/sql-reference/statements/alter/index.md/#mutations). If `IN PARTITION` part is omitted then it rebuilds the index for the whole table data. +- `ALTER TABLE [db.]table_name [ON CLUSTER cluster] MATERIALIZE INDEX name [IN PARTITION partition_name]` - Rebuilds the secondary index `name` for the specified `partition_name`. Implemented as a [mutation](/docs/en/sql-reference/statements/alter/index.md#mutations). If `IN PARTITION` part is omitted then it rebuilds the index for the whole table data. The first two commands are lightweight in a sense that they only change metadata or remove files. diff --git a/docs/en/sql-reference/statements/alter/update.md b/docs/en/sql-reference/statements/alter/update.md index f40b72f7ab3..5d27c382982 100644 --- a/docs/en/sql-reference/statements/alter/update.md +++ b/docs/en/sql-reference/statements/alter/update.md @@ -10,7 +10,7 @@ sidebar_label: UPDATE ALTER TABLE [db.]table [ON CLUSTER cluster] UPDATE column1 = expr1 [, ...] WHERE filter_expr ``` -Manipulates data matching the specified filtering expression. Implemented as a [mutation](/docs/en/sql-reference/statements/alter/index.md/#mutations). +Manipulates data matching the specified filtering expression. Implemented as a [mutation](/docs/en/sql-reference/statements/alter/index.md#mutations). :::note The `ALTER TABLE` prefix makes this syntax different from most other systems supporting SQL. It is intended to signify that unlike similar queries in OLTP databases this is a heavy operation not designed for frequent use. @@ -24,7 +24,7 @@ The synchronicity of the query processing is defined by the [mutations_sync](/do **See also** -- [Mutations](/docs/en/sql-reference/statements/alter/index.md/#mutations) -- [Synchronicity of ALTER Queries](/docs/en/sql-reference/statements/alter/index.md/#synchronicity-of-alter-queries) +- [Mutations](/docs/en/sql-reference/statements/alter/index.md#mutations) +- [Synchronicity of ALTER Queries](/docs/en/sql-reference/statements/alter/index.md#synchronicity-of-alter-queries) - [mutations_sync](/docs/en/operations/settings/settings.md/#mutations_sync) setting diff --git a/docs/ru/operations/settings/index.md b/docs/ru/operations/settings/index.md index 4e055405847..6806aea5135 100644 --- a/docs/ru/operations/settings/index.md +++ b/docs/ru/operations/settings/index.md @@ -24,7 +24,7 @@ slug: /ru/operations/settings/ - При запуске консольного клиента ClickHouse в не интерактивном режиме установите параметр запуска `--setting=value`. - При использовании HTTP API передавайте cgi-параметры (`URL?setting_1=value&setting_2=value...`). - - Укажите необходимые настройки в секции [SETTINGS](../../sql-reference/statements/select/index.md#settings-in-select) запроса SELECT. Эти настройки действуют только в рамках данного запроса, а после его выполнения сбрасываются до предыдущего значения или значения по умолчанию. + - Укажите необходимые настройки в секции [SETTINGS](../../sql-reference/statements/select/index.md#settings-in-select-query) запроса SELECT. Эти настройки действуют только в рамках данного запроса, а после его выполнения сбрасываются до предыдущего значения или значения по умолчанию. Настройки, которые можно задать только в конфигурационном файле сервера, в разделе не рассматриваются. diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index a070dbd5e10..58894611386 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -479,7 +479,7 @@ SELECT * FROM table_with_enum_column_for_tsv_insert; Включает или отключает вставку [значений по умолчанию](../../sql-reference/statements/create/table.md#create-default-values) вместо [NULL](../../sql-reference/syntax.md#null-literal) в столбцы, которые не позволяют [хранить NULL](../../sql-reference/data-types/nullable.md#data_type-nullable). Если столбец не позволяет хранить `NULL` и эта настройка отключена, то вставка `NULL` приведет к возникновению исключения. Если столбец позволяет хранить `NULL`, то значения `NULL` вставляются независимо от этой настройки. -Эта настройка используется для запросов [INSERT ... SELECT](../../sql-reference/statements/insert-into.md#insert_query_insert-select). При этом подзапросы `SELECT` могут объединяться с помощью `UNION ALL`. +Эта настройка используется для запросов [INSERT ... SELECT](../../sql-reference/statements/insert-into.md#inserting-the-results-of-select). При этом подзапросы `SELECT` могут объединяться с помощью `UNION ALL`. Возможные значения: diff --git a/docs/ru/sql-reference/statements/alter/column.md b/docs/ru/sql-reference/statements/alter/column.md index 11ec72596c4..a8ace213075 100644 --- a/docs/ru/sql-reference/statements/alter/column.md +++ b/docs/ru/sql-reference/statements/alter/column.md @@ -254,7 +254,7 @@ SELECT groupArray(x), groupArray(s) FROM tmp; Отсутствует возможность удалять столбцы, входящие в первичный ключ или ключ для сэмплирования (в общем, входящие в выражение `ENGINE`). Изменение типа у столбцов, входящих в первичный ключ возможно только в том случае, если это изменение не приводит к изменению данных (например, разрешено добавление значения в Enum или изменение типа с `DateTime` на `UInt32`). -Если возможностей запроса `ALTER` не хватает для нужного изменения таблицы, вы можете создать новую таблицу, скопировать туда данные с помощью запроса [INSERT SELECT](../insert-into.md#insert_query_insert-select), затем поменять таблицы местами с помощью запроса [RENAME](../rename.md#rename-table), и удалить старую таблицу. В качестве альтернативы для запроса `INSERT SELECT`, можно использовать инструмент [clickhouse-copier](../../../sql-reference/statements/alter/index.md). +Если возможностей запроса `ALTER` не хватает для нужного изменения таблицы, вы можете создать новую таблицу, скопировать туда данные с помощью запроса [INSERT SELECT](../insert-into.md#inserting-the-results-of-select), затем поменять таблицы местами с помощью запроса [RENAME](../rename.md#rename-table), и удалить старую таблицу. В качестве альтернативы для запроса `INSERT SELECT`, можно использовать инструмент [clickhouse-copier](../../../sql-reference/statements/alter/index.md). Запрос `ALTER` блокирует все чтения и записи для таблицы. То есть если на момент запроса `ALTER` выполнялся долгий `SELECT`, то запрос `ALTER` сначала дождётся его выполнения. И в это время все новые запросы к той же таблице будут ждать, пока завершится этот `ALTER`. diff --git a/docs/ru/sql-reference/statements/insert-into.md b/docs/ru/sql-reference/statements/insert-into.md index 573b8d39926..4fa6ac4ce66 100644 --- a/docs/ru/sql-reference/statements/insert-into.md +++ b/docs/ru/sql-reference/statements/insert-into.md @@ -95,7 +95,7 @@ INSERT INTO t FORMAT TabSeparated Если в таблице объявлены [ограничения](../../sql-reference/statements/create/table.md#constraints), то их выполнимость будет проверена для каждой вставляемой строки. Если для хотя бы одной строки ограничения не будут выполнены, запрос будет остановлен. -### Вставка результатов `SELECT` {#insert_query_insert-select} +### Вставка результатов `SELECT` {#inserting-the-results-of-select} **Синтаксис** diff --git a/docs/ru/sql-reference/statements/select/index.md b/docs/ru/sql-reference/statements/select/index.md index 4479e24000b..f360a09eb10 100644 --- a/docs/ru/sql-reference/statements/select/index.md +++ b/docs/ru/sql-reference/statements/select/index.md @@ -270,7 +270,7 @@ SELECT * REPLACE(i + 1 AS i) EXCEPT (j) APPLY(sum) from columns_transformers; └─────────────────┴────────┘ ``` -## SETTINGS в запросе SELECT {#settings-in-select} +## SETTINGS в запросе SELECT {#settings-in-select-query} Вы можете задать значения необходимых настроек непосредственно в запросе `SELECT` в секции `SETTINGS`. Эти настройки действуют только в рамках данного запроса, а после его выполнения сбрасываются до предыдущего значения или значения по умолчанию. diff --git a/docs/zh/sql-reference/statements/alter.md b/docs/zh/sql-reference/statements/alter.md index 23edfd633db..fd73be4fd93 100644 --- a/docs/zh/sql-reference/statements/alter.md +++ b/docs/zh/sql-reference/statements/alter.md @@ -150,7 +150,7 @@ ALTER TABLE visits MODIFY COLUMN browser Array(String) 不支持对primary key或者sampling key中的列(在 `ENGINE` 表达式中用到的列)进行删除操作。改变包含在primary key中的列的类型时,如果操作不会导致数据的变化(例如,往Enum中添加一个值,或者将`DateTime` 类型改成 `UInt32`),那么这种操作是可行的。 -如果 `ALTER` 操作不足以完成你想要的表变动操作,你可以创建一张新的表,通过 [INSERT SELECT](../../sql-reference/statements/insert-into.md#insert_query_insert-select)将数据拷贝进去,然后通过 [RENAME](../../sql-reference/statements/misc.md#misc_operations-rename)将新的表改成和原有表一样的名称,并删除原有的表。你可以使用 [clickhouse-copier](../../operations/utilities/clickhouse-copier.md) 代替 `INSERT SELECT`。 +如果 `ALTER` 操作不足以完成你想要的表变动操作,你可以创建一张新的表,通过 [INSERT SELECT](../../sql-reference/statements/insert-into.md#inserting-the-results-of-select)将数据拷贝进去,然后通过 [RENAME](../../sql-reference/statements/misc.md#misc_operations-rename)将新的表改成和原有表一样的名称,并删除原有的表。你可以使用 [clickhouse-copier](../../operations/utilities/clickhouse-copier.md) 代替 `INSERT SELECT`。 `ALTER` 操作会阻塞对表的所有读写操作。换句话说,当一个大的 `SELECT` 语句和 `ALTER`同时执行时,`ALTER`会等待,直到 `SELECT` 执行结束。与此同时,当 `ALTER` 运行时,新的 sql 语句将会等待。 diff --git a/docs/zh/sql-reference/statements/insert-into.md b/docs/zh/sql-reference/statements/insert-into.md index 69762bf43bc..f199329829c 100644 --- a/docs/zh/sql-reference/statements/insert-into.md +++ b/docs/zh/sql-reference/statements/insert-into.md @@ -90,7 +90,7 @@ INSERT INTO t FORMAT TabSeparated 如果表中有一些[限制](../../sql-reference/statements/create/table.mdx#constraints),,数据插入时会逐行进行数据校验,如果这里面包含了不符合限制条件的数据,服务将会抛出包含限制信息的异常,这个语句也会被停止执行。 -### 使用`SELECT`的结果写入 {#insert_query_insert-select} +### 使用`SELECT`的结果写入 {#inserting-the-results-of-select} ``` sql INSERT INTO [db.]table [(c1, c2, c3)] SELECT ... From ce8066e90e5c708899f05b2f0544dde4bd324718 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 16 Nov 2022 07:31:24 +0100 Subject: [PATCH 65/80] Add changelog for 21.11 --- CHANGELOG.md | 103 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 103 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 68767612892..ac1f0793c04 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,5 @@ ### Table of Contents +**[ClickHouse release v22.11, 2022-11-17](#2211)**
**[ClickHouse release v22.10, 2022-10-25](#2210)**
**[ClickHouse release v22.9, 2022-09-22](#229)**
**[ClickHouse release v22.8-lts, 2022-08-18](#228)**
@@ -11,6 +12,108 @@ **[ClickHouse release v22.1, 2022-01-18](#221)**
**[Changelog for 2021](https://clickhouse.com/docs/en/whats-new/changelog/2021/)**
+###
ClickHouse release 22.11, 2022-11-17 + +#### Backward Incompatible Change +* `JSONExtract` family of functions will now attempt to coerce to the requested type. [#41502](https://github.com/ClickHouse/ClickHouse/pull/41502) ([Márcio Martins](https://github.com/marcioapm)). + +#### New Feature +* Support for retries during INSERTs into ReplicatedMergeTree if a session with ClickHouse Keeper is lost. Apart from fault tolerance, it aims to provide better user experience, - avoid returning a user an error during insert if keeper is restarted (for example, due to upgrade). [#42607](https://github.com/ClickHouse/ClickHouse/pull/42607) ([Igor Nikonov](https://github.com/devcrafter)). +* Add `Hudi` and `DeltaLake` table engines, read-only, only for tables on S3. [#41054](https://github.com/ClickHouse/ClickHouse/pull/41054) ([Daniil Rubin](https://github.com/rubin-do), [Kseniia Sumarokova](https://github.com/kssenii)). +* Add table function `hudi` and `deltaLake`. [#43080](https://github.com/ClickHouse/ClickHouse/pull/43080) ([flynn](https://github.com/ucasfl)). +* Support for composite time intervals. 1. Add, subtract and negate operations are now available on Intervals. In case when the types of Intervals are different they will be transformed into the Tuple of those types. 2. A tuple of intervals can be added to or subtracted from a Date/DateTime field. 3. Added parsing of Intervals with different types, for example: `INTERVAL '1 HOUR 1 MINUTE 1 SECOND'`. [#42195](https://github.com/ClickHouse/ClickHouse/pull/42195) ([Nikolay Degterinsky](https://github.com/evillique)). +* Added `**` glob support for recursive directory traversal to filesystem and S3. resolves [#36316](https://github.com/ClickHouse/ClickHouse/issues/36316). [#42376](https://github.com/ClickHouse/ClickHouse/pull/42376) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Introduce `s3_plain` disk type for write-once-read-many operations. Implement `ATTACH` of `MergeTree` table for `s3_plain` disk. [#42628](https://github.com/ClickHouse/ClickHouse/pull/42628) ([Azat Khuzhin](https://github.com/azat)). +* Added applied row-level policies to `system.query_log`. [#39819](https://github.com/ClickHouse/ClickHouse/pull/39819) ([Vladimir Chebotaryov](https://github.com/quickhouse)). +* Add four-letter command `csnp` for manually creating snapshots in ClickHouse Keeper. Additionally, `lgif` was added to get Raft information for a specific node (e.g. index of last created snapshot, last committed log index). [#41766](https://github.com/ClickHouse/ClickHouse/pull/41766) ([JackyWoo](https://github.com/JackyWoo)). +* Add function `ascii` like in spark: https://spark.apache.org/docs/latest/api/sql/#ascii. [#42670](https://github.com/ClickHouse/ClickHouse/pull/42670) ([李扬](https://github.com/taiyang-li)). +* Add function `pmod` which return non-negative result based on modulo. [#42755](https://github.com/ClickHouse/ClickHouse/pull/42755) ([李扬](https://github.com/taiyang-li)). +* Add function `formatReadableDecimalSize`. [#42774](https://github.com/ClickHouse/ClickHouse/pull/42774) ([Alejandro](https://github.com/alexon1234)). +* Add function `randCanonical`, which is similar to the `rand` function in spark or impala. The function generates pseudo random results with independent and identically distributed uniformly distributed values in [0, 1). [#43124](https://github.com/ClickHouse/ClickHouse/pull/43124) ([李扬](https://github.com/taiyang-li)). +* Add function `displayName`, closes [#36770](https://github.com/ClickHouse/ClickHouse/issues/36770). [#37681](https://github.com/ClickHouse/ClickHouse/pull/37681) ([hongbin](https://github.com/xlwh)). +* Add `min_age_to_force_merge_on_partition_only` setting to optimize old parts for the entire partition only. [#42659](https://github.com/ClickHouse/ClickHouse/pull/42659) ([Antonio Andelic](https://github.com/antonio2368)). +* Add generic implementation for arbitrary structured named collections, access type and `system.named_collections`. [#43147](https://github.com/ClickHouse/ClickHouse/pull/43147) ([Kseniia Sumarokova](https://github.com/kssenii)). + +#### Performance Improvement +* `match` function can use the index if it's a condition on string prefix. This closes [#37333](https://github.com/ClickHouse/ClickHouse/issues/37333). [#42458](https://github.com/ClickHouse/ClickHouse/pull/42458) ([clarkcaoliu](https://github.com/Clark0)). +* Speed up AND and OR operators when they are sequenced. [#42214](https://github.com/ClickHouse/ClickHouse/pull/42214) ([Zhiguo Zhou](https://github.com/ZhiguoZh)). +* Support parallel parsing for `LineAsString` input format. This improves performance just slightly. This closes [#42502](https://github.com/ClickHouse/ClickHouse/issues/42502). [#42780](https://github.com/ClickHouse/ClickHouse/pull/42780) ([Kruglov Pavel](https://github.com/Avogar)). +* ClickHouse Keeper performance improvement: improve commit performance for cases when many different nodes have uncommitted states. This should help with cases when a follower node can't sync fast enough. [#42926](https://github.com/ClickHouse/ClickHouse/pull/42926) ([Antonio Andelic](https://github.com/antonio2368)). +* A condition like `NOT LIKE 'prefix%'` can use primary index. [#42209](https://github.com/ClickHouse/ClickHouse/pull/42209) ([Duc Canh Le](https://github.com/canhld94)). + +#### Experimental Feature +* Support type `Object` inside other types, e.g. `Array(JSON)`. [#36969](https://github.com/ClickHouse/ClickHouse/pull/36969) ([Anton Popov](https://github.com/CurtizJ)). +* Ignore MySQL binlog SAVEPOINT event for MaterializedMySQL. [#42931](https://github.com/ClickHouse/ClickHouse/pull/42931) ([zzsmdfj](https://github.com/zzsmdfj)). Handle (ignore) SAVEPOINT queries in MaterializedMySQL. [#43086](https://github.com/ClickHouse/ClickHouse/pull/43086) ([Stig Bakken](https://github.com/stigsb)). + +#### Improvement +* Trivial queries with small LIMIT will properly determine the number of estimated rows to read, so the threshold will be checked properly. Closes [#7071](https://github.com/ClickHouse/ClickHouse/issues/7071). [#42580](https://github.com/ClickHouse/ClickHouse/pull/42580) ([Han Fei](https://github.com/hanfei1991)). +* Add support for interactive parameters in INSERT VALUES queries. [#43077](https://github.com/ClickHouse/ClickHouse/pull/43077) ([Nikolay Degterinsky](https://github.com/evillique)). +* Added new field `allow_readonly` in `system.table_functions` to allow using table functions in readonly mode resolves [#42414](https://github.com/ClickHouse/ClickHouse/issues/42414) Implementation: * Added a new field allow_readonly to table system.table_functions. * Updated to use new field allow_readonly to allow using table functions in readonly mode. Testing: * Added a test for filesystem tests/queries/0_stateless/02473_functions_in_readonly_mode.sh Documentation: * Updated the english documentation for Table Functions. [#42708](https://github.com/ClickHouse/ClickHouse/pull/42708) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* The `system.asynchronous_metrics` gets embedded documentation. This documentation is also exported to Prometheus. Fixed an error with the metrics about `cache` disks - they were calculated only for one arbitrary cache disk instead all of them. This closes [#7644](https://github.com/ClickHouse/ClickHouse/issues/7644). [#43194](https://github.com/ClickHouse/ClickHouse/pull/43194) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Throttling algorithm changed to token bucket. [#42665](https://github.com/ClickHouse/ClickHouse/pull/42665) ([Sergei Trifonov](https://github.com/serxa)). +* Mask passwords and secret keys both in `system.query_log` and `/var/log/clickhouse-server/*.log` and also in error messages. [#42484](https://github.com/ClickHouse/ClickHouse/pull/42484) ([Vitaly Baranov](https://github.com/vitlibar)). +* Remove covered parts for fetched part (to avoid possible replication delay grows). [#39737](https://github.com/ClickHouse/ClickHouse/pull/39737) ([Azat Khuzhin](https://github.com/azat)). +* If `/dev/tty` is available, the progress in clickhouse-client and clickhouse-local will be rendered directly to the terminal, without writing to stderr. It allows to get progress even if stderr is redirected to a file, and the file will not be polluted by terminal escape sequences. The progress can be disabled by `--progress false`. This closes [#32238](https://github.com/ClickHouse/ClickHouse/issues/32238). [#42003](https://github.com/ClickHouse/ClickHouse/pull/42003) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add support for `FixedString` input to base64 coding functions. [#42285](https://github.com/ClickHouse/ClickHouse/pull/42285) ([ltrk2](https://github.com/ltrk2)). +* Add columns `bytes_on_disk` and `path` to `system.detached_parts`. Closes [#42264](https://github.com/ClickHouse/ClickHouse/issues/42264). [#42303](https://github.com/ClickHouse/ClickHouse/pull/42303) ([chen](https://github.com/xiedeyantu)). +* Improve using structure from insertion table in table functions, now setting `use_structure_from_insertion_table_in_table_functions` has new possible value - `2` that means that ClickHouse will try to determine if we can use structure from insertion table or not automatically. Closes [#40028](https://github.com/ClickHouse/ClickHouse/issues/40028). [#42320](https://github.com/ClickHouse/ClickHouse/pull/42320) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix no progress indication on INSERT FROM INFILE. Closes [#42548](https://github.com/ClickHouse/ClickHouse/issues/42548). [#42634](https://github.com/ClickHouse/ClickHouse/pull/42634) ([chen](https://github.com/xiedeyantu)). +* Refactor function `tokens` to enable max tokens returned for related functions (disabled by default). [#42673](https://github.com/ClickHouse/ClickHouse/pull/42673) ([李扬](https://github.com/taiyang-li)). +* Allow to use `Date32` arguments for `formatDateTime` and `FROM_UNIXTIME` functions. [#42737](https://github.com/ClickHouse/ClickHouse/pull/42737) ([Roman Vasin](https://github.com/rvasin)). +* Update tzdata to 2022f. Mexico will no longer observe DST except near the US border: https://www.timeanddate.com/news/time/mexico-abolishes-dst-2022.html. Chihuahua moves to year-round UTC-6 on 2022-10-30. Fiji no longer observes DST. See https://github.com/google/cctz/pull/235 and https://bugs.launchpad.net/ubuntu/+source/tzdata/+bug/1995209. [#42796](https://github.com/ClickHouse/ClickHouse/pull/42796) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add `FailedAsyncInsertQuery` event metric for async inserts. [#42814](https://github.com/ClickHouse/ClickHouse/pull/42814) ([Krzysztof Góralski](https://github.com/kgoralski)). +* Implement `read-in-order` optimization on top of query plan. It is enabled by default. Set `query_plan_read_in_order = 0` to use previous AST-based version. [#42829](https://github.com/ClickHouse/ClickHouse/pull/42829) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Increase the size of upload part exponentially for backup to S3 to avoid errors about max 10 000 parts limit of the multipart upload to s3. [#42833](https://github.com/ClickHouse/ClickHouse/pull/42833) ([Vitaly Baranov](https://github.com/vitlibar)). +* When the merge task is continuously busy and the disk space is insufficient, the completely expired parts cannot be selected and dropped, resulting in insufficient disk space. My idea is that when the entire Part expires, there is no need for additional disk space to guarantee, ensure the normal execution of TTL. [#42869](https://github.com/ClickHouse/ClickHouse/pull/42869) ([zhongyuankai](https://github.com/zhongyuankai)). +* Add `oss` function and `OSS` table engine (this is convenient for users). oss is fully compatible with s3. [#43155](https://github.com/ClickHouse/ClickHouse/pull/43155) ([zzsmdfj](https://github.com/zzsmdfj)). +* Improve error reporting in the collection of OS-related info for the `system.asynchronous_metrics` table. [#43192](https://github.com/ClickHouse/ClickHouse/pull/43192) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Modify the `INFORMATION_SCHEMA` tables in a way so that now ClickHouse can connect to itself using the MySQL compatibility protocol. Add columns instead of aliases (related to [#9769](https://github.com/ClickHouse/ClickHouse/issues/9769)). It will improve the compatibility with various MySQL clients. [#43198](https://github.com/ClickHouse/ClickHouse/pull/43198) ([Filatenkov Artur](https://github.com/FArthur-cmd)). +* Add some functions for compatibility with PowerBI, when it connects using MySQL protocol [#42612](https://github.com/ClickHouse/ClickHouse/pull/42612) ([Filatenkov Artur](https://github.com/FArthur-cmd)). +* Better usability for Dashboard on changes [#42872](https://github.com/ClickHouse/ClickHouse/pull/42872) ([Vladimir C](https://github.com/vdimir)). + +#### Build/Testing/Packaging Improvement +* Run SQLancer for each pull request and commit to master. [SQLancer](https://github.com/sqlancer/sqlancer) is an OpenSource fuzzer that focuses on automatic detection of logical bugs. [#42397](https://github.com/ClickHouse/ClickHouse/pull/42397) ([Ilya Yatsishin](https://github.com/qoega)). +* Update to latest zlib-ng. [#42463](https://github.com/ClickHouse/ClickHouse/pull/42463) ([Boris Kuschel](https://github.com/bkuschel)). +* Add support for testing ClickHouse server with Jepsen. By the way, we already have support for testing ClickHouse Keeper with Jepsen. This pull request extends it to Replicated tables. [#42619](https://github.com/ClickHouse/ClickHouse/pull/42619) ([Antonio Andelic](https://github.com/antonio2368)). +* Use https://github.com/matus-chochlik/ctcache for clang-tidy results caching. [#42913](https://github.com/ClickHouse/ClickHouse/pull/42913) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Before the fix, the user-defined config was preserved by RPM in `$file.rpmsave`. The PR fixes it and won't replace the user's files from packages. [#42936](https://github.com/ClickHouse/ClickHouse/pull/42936) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Remove some libraries from Ubuntu Docker image. [#42622](https://github.com/ClickHouse/ClickHouse/pull/42622) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Bug Fix (user-visible misbehavior in official stable or prestable release) + +* Updated normaliser to clone the alias ast. resolves [#42452](https://github.com/ClickHouse/ClickHouse/issues/42452) Implementation: * Updated QueryNormalizer to clone alias ast, when its replaced. Previously just assigning the same leads to exception in LogicalExpressinsOptimizer as it would be the same parent being inserted again. * This bug is not seen with new analyser (allow_experimental_analyzer), so no changes for it. I added a test for the same. [#42827](https://github.com/ClickHouse/ClickHouse/pull/42827) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Fix race for backup of tables in `Lazy` databases. [#43104](https://github.com/ClickHouse/ClickHouse/pull/43104) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix for `skip_unavailable_shards`: it did not work with the `s3Cluster` table function. [#43131](https://github.com/ClickHouse/ClickHouse/pull/43131) ([chen](https://github.com/xiedeyantu)). +* Fix schema inference in `s3Cluster` and improvement in `hdfsCluster`. [#41979](https://github.com/ClickHouse/ClickHouse/pull/41979) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix retries while reading from URL table engines / table function. (retrtiable errors could be retries more times than needed, non-retrialble errors resulted in failed assertion in code). [#42224](https://github.com/ClickHouse/ClickHouse/pull/42224) ([Kseniia Sumarokova](https://github.com/kssenii)). +* A segmentation fault related to DNS & c-ares has been reported and fixed. [#42234](https://github.com/ClickHouse/ClickHouse/pull/42234) ([Arthur Passos](https://github.com/arthurpassos)). +* Fix `LOGICAL_ERROR` `Arguments of 'plus' have incorrect data types` which may happen in PK analysis (monotonicity check). Fix invalid PK analysis for monotonic binary functions with first constant argument. [#42410](https://github.com/ClickHouse/ClickHouse/pull/42410) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix incorrect key analysis when key types cannot be inside Nullable. This fixes [#42456](https://github.com/ClickHouse/ClickHouse/issues/42456). [#42469](https://github.com/ClickHouse/ClickHouse/pull/42469) ([Amos Bird](https://github.com/amosbird)). +* Fix typo in setting name that led to bad usage of schema inference cache while using setting `input_format_csv_use_best_effort_in_schema_inference`. Closes [#41735](https://github.com/ClickHouse/ClickHouse/issues/41735). [#42536](https://github.com/ClickHouse/ClickHouse/pull/42536) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix creating a Set with wrong header when data type is LowCardinality. Closes [#42460](https://github.com/ClickHouse/ClickHouse/issues/42460). [#42579](https://github.com/ClickHouse/ClickHouse/pull/42579) ([flynn](https://github.com/ucasfl)). +* `(U)Int128` and `(U)Int256` values were correctly checked in `PREWHERE`. [#42605](https://github.com/ClickHouse/ClickHouse/pull/42605) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix a bug in functions parser that could have led to a segmentation fault. [#42724](https://github.com/ClickHouse/ClickHouse/pull/42724) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix the locking in `truncate table`. [#42728](https://github.com/ClickHouse/ClickHouse/pull/42728) ([flynn](https://github.com/ucasfl)). +* Fix possible crash in `web` disks when file does not exist (or `OPTIMIZE TABLE FINAL`, that also can got the same error eventually). [#42767](https://github.com/ClickHouse/ClickHouse/pull/42767) ([Azat Khuzhin](https://github.com/azat)). +* Fix `auth_type` mapping in `system.session_log`, by including `SSL_CERTIFICATE` for the enum values. [#42782](https://github.com/ClickHouse/ClickHouse/pull/42782) ([Miel Donkers](https://github.com/mdonkers)). +* Fix stack-use-after-return under ASAN build in the Create User query parser. [#42804](https://github.com/ClickHouse/ClickHouse/pull/42804) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix `lowerUTF8`/`upperUTF8` in case of symbol was in between 16-byte boundary (very frequent case of you have strings > 16 bytes long). [#42812](https://github.com/ClickHouse/ClickHouse/pull/42812) ([Azat Khuzhin](https://github.com/azat)). +* Additional bound check was added to lz4 decompression routine to fix misbehaviour in case of malformed input. [#42868](https://github.com/ClickHouse/ClickHouse/pull/42868) ([Nikita Taranov](https://github.com/nickitat)). +* Fix rare possible hung on query cancellation. [#42874](https://github.com/ClickHouse/ClickHouse/pull/42874) ([Azat Khuzhin](https://github.com/azat)). +* Fix incorrect behavior with multiple disjuncts in hash join, close [#42832](https://github.com/ClickHouse/ClickHouse/issues/42832). [#42876](https://github.com/ClickHouse/ClickHouse/pull/42876) ([Vladimir C](https://github.com/vdimir)). +* A null pointer will be generated when select if as from ‘three table join’ , For example, this SQL query: [#42883](https://github.com/ClickHouse/ClickHouse/pull/42883) ([zzsmdfj](https://github.com/zzsmdfj)). +* Fix memory sanitizer report in Cluster Discovery, close [#42763](https://github.com/ClickHouse/ClickHouse/issues/42763). [#42905](https://github.com/ClickHouse/ClickHouse/pull/42905) ([Vladimir C](https://github.com/vdimir)). +* Improve datetime schema inference in case of empty string. [#42911](https://github.com/ClickHouse/ClickHouse/pull/42911) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix rare NOT_FOUND_COLUMN_IN_BLOCK error when projection is possible to use but there is no projection available. This fixes [#42771](https://github.com/ClickHouse/ClickHouse/issues/42771) . The bug was introduced in https://github.com/ClickHouse/ClickHouse/pull/25563. [#42938](https://github.com/ClickHouse/ClickHouse/pull/42938) ([Amos Bird](https://github.com/amosbird)). +* Fix ATTACH TABLE in `PostgreSQL` database engine if the table contains DATETIME data type. Closes [#42817](https://github.com/ClickHouse/ClickHouse/issues/42817). [#42960](https://github.com/ClickHouse/ClickHouse/pull/42960) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix lambda parsing. Closes [#41848](https://github.com/ClickHouse/ClickHouse/issues/41848). [#42979](https://github.com/ClickHouse/ClickHouse/pull/42979) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix incorrect key analysis when nullable keys appear in the middle of a hyperrectangle. This fixes [#43111](https://github.com/ClickHouse/ClickHouse/issues/43111) . [#43133](https://github.com/ClickHouse/ClickHouse/pull/43133) ([Amos Bird](https://github.com/amosbird)). +* Fix several buffer over-reads in deserialization of carefully crafted aggregate function states. [#43159](https://github.com/ClickHouse/ClickHouse/pull/43159) ([Raúl Marín](https://github.com/Algunenano)). +* Fix function `if` in case of NULL and const Nullable arguments. Closes [#43069](https://github.com/ClickHouse/ClickHouse/issues/43069). [#43178](https://github.com/ClickHouse/ClickHouse/pull/43178) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix decimal math overflow in parsing datetime with the 'best effort' algorithm. Closes [#43061](https://github.com/ClickHouse/ClickHouse/issues/43061). [#43180](https://github.com/ClickHouse/ClickHouse/pull/43180) ([Kruglov Pavel](https://github.com/Avogar)). +* The `indent` field produced by the `git-import` tool was miscalculated. See https://clickhouse.com/docs/en/getting-started/example-datasets/github/. [#43191](https://github.com/ClickHouse/ClickHouse/pull/43191) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fixed unexpected behaviour of `Interval` types with subquery and casting. [#43193](https://github.com/ClickHouse/ClickHouse/pull/43193) ([jh0x](https://github.com/jh0x)). + ### ClickHouse release 22.10, 2022-10-26 #### Backward Incompatible Change From 981e6e2c24b96ef3e3516273d04864658fda1d72 Mon Sep 17 00:00:00 2001 From: Vladimir C Date: Wed, 16 Nov 2022 12:44:54 +0100 Subject: [PATCH 66/80] Update src/Analyzer/Passes/QueryAnalysisPass.cpp --- src/Analyzer/Passes/QueryAnalysisPass.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index 760d036ab4f..2b76376c4c9 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -2177,8 +2177,6 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromAliases(const Identifier } else { - alias_name_to_node_map.erase(it); - std::unordered_set valid_identifiers; collectScopeWithParentScopesValidIdentifiersForTypoCorrection(identifier, scope, true, false, false, valid_identifiers); From 2d265523bbaf5dd1aff1f60ddae52640098da5cf Mon Sep 17 00:00:00 2001 From: Vladimir C Date: Wed, 16 Nov 2022 12:45:54 +0100 Subject: [PATCH 67/80] Update src/Analyzer/Passes/QueryAnalysisPass.cpp --- src/Analyzer/Passes/QueryAnalysisPass.cpp | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index 2b76376c4c9..138ff721f99 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -2171,11 +2171,7 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromAliases(const Identifier auto & alias_identifier_node = it->second->as(); auto identifier = alias_identifier_node.getIdentifier(); auto lookup_result = tryResolveIdentifier(IdentifierLookup{identifier, identifier_lookup.lookup_context}, scope, identifier_resolve_settings); - if (lookup_result.isResolved()) - { - it->second = lookup_result.resolved_identifier; - } - else + if (!lookup_result.isResolved()) { std::unordered_set valid_identifiers; collectScopeWithParentScopesValidIdentifiersForTypoCorrection(identifier, scope, true, false, false, valid_identifiers); @@ -2187,6 +2183,7 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromAliases(const Identifier scope.scope_node->formatASTForErrorMessage(), getHintsErrorMessageSuffix(hints)); } + it->second = lookup_result.resolved_identifier; /** During collection of aliases if node is identifier and has alias, we cannot say if it is * column or function node. Check QueryExpressionsAliasVisitor documentation for clarification. From b13a8d478bb9c1d1e9f333c26acb6b33a22c6d9b Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Wed, 16 Nov 2022 11:53:59 +0100 Subject: [PATCH 68/80] check limits for an AST in select parser fuzzer --- src/Parsers/fuzzers/select_parser_fuzzer.cpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/Parsers/fuzzers/select_parser_fuzzer.cpp b/src/Parsers/fuzzers/select_parser_fuzzer.cpp index caa6c586cd6..3f712834c55 100644 --- a/src/Parsers/fuzzers/select_parser_fuzzer.cpp +++ b/src/Parsers/fuzzers/select_parser_fuzzer.cpp @@ -12,7 +12,15 @@ try std::string input = std::string(reinterpret_cast(data), size); DB::ParserQueryWithOutput parser(input.data() + input.size()); - DB::ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 1000); + + const UInt64 max_parser_depth = 1000; + DB::ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, max_parser_depth); + + const UInt64 max_ast_depth = 1000; + ast->checkDepth(max_ast_depth); + + const UInt64 max_ast_elements = 50000; + ast->checkSize(max_ast_elements); DB::WriteBufferFromOwnString wb; DB::formatAST(*ast, wb); From dcb76f6bbf1db37b15da40bab450760e7115bcea Mon Sep 17 00:00:00 2001 From: filimonov <1549571+filimonov@users.noreply.github.com> Date: Wed, 16 Nov 2022 14:16:42 +0100 Subject: [PATCH 69/80] Allow autoremoval of old parts if detach_not_byte_identical_parts enabled Allow autoremoval of old & detached parts if detach_not_byte_identical_parts enabled. See also #28708 #37975 --- src/Storages/MergeTree/MergeTreePartInfo.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreePartInfo.h b/src/Storages/MergeTree/MergeTreePartInfo.h index c19cc55e74e..60c7e4e8822 100644 --- a/src/Storages/MergeTree/MergeTreePartInfo.h +++ b/src/Storages/MergeTree/MergeTreePartInfo.h @@ -154,6 +154,8 @@ struct DetachedPartInfo : public MergeTreePartInfo "deleting", "tmp-fetch", "covered-by-broken", + "merge-not-byte-identical", + "mutate-not-byte-identical" }); static constexpr auto DETACHED_REASONS_REMOVABLE_BY_TIMEOUT = std::to_array({ @@ -163,7 +165,9 @@ struct DetachedPartInfo : public MergeTreePartInfo "ignored", "broken-on-start", "deleting", - "clone" + "clone", + "merge-not-byte-identical", + "mutate-not-byte-identical" }); /// NOTE: It may parse part info incorrectly. From ea8c7df296deeac1c5f65416d03c73f2b044089f Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Wed, 16 Nov 2022 09:32:13 -0500 Subject: [PATCH 70/80] edits --- CHANGELOG.md | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ac1f0793c04..051bde44dd2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,18 +18,18 @@ * `JSONExtract` family of functions will now attempt to coerce to the requested type. [#41502](https://github.com/ClickHouse/ClickHouse/pull/41502) ([Márcio Martins](https://github.com/marcioapm)). #### New Feature -* Support for retries during INSERTs into ReplicatedMergeTree if a session with ClickHouse Keeper is lost. Apart from fault tolerance, it aims to provide better user experience, - avoid returning a user an error during insert if keeper is restarted (for example, due to upgrade). [#42607](https://github.com/ClickHouse/ClickHouse/pull/42607) ([Igor Nikonov](https://github.com/devcrafter)). +* Adds support for retries during INSERTs into ReplicatedMergeTree when a session with ClickHouse Keeper is lost. Apart from fault tolerance, it aims to provide better user experience, - avoid returning a user an error during insert if keeper is restarted (for example, due to upgrade). [#42607](https://github.com/ClickHouse/ClickHouse/pull/42607) ([Igor Nikonov](https://github.com/devcrafter)). * Add `Hudi` and `DeltaLake` table engines, read-only, only for tables on S3. [#41054](https://github.com/ClickHouse/ClickHouse/pull/41054) ([Daniil Rubin](https://github.com/rubin-do), [Kseniia Sumarokova](https://github.com/kssenii)). * Add table function `hudi` and `deltaLake`. [#43080](https://github.com/ClickHouse/ClickHouse/pull/43080) ([flynn](https://github.com/ucasfl)). -* Support for composite time intervals. 1. Add, subtract and negate operations are now available on Intervals. In case when the types of Intervals are different they will be transformed into the Tuple of those types. 2. A tuple of intervals can be added to or subtracted from a Date/DateTime field. 3. Added parsing of Intervals with different types, for example: `INTERVAL '1 HOUR 1 MINUTE 1 SECOND'`. [#42195](https://github.com/ClickHouse/ClickHouse/pull/42195) ([Nikolay Degterinsky](https://github.com/evillique)). -* Added `**` glob support for recursive directory traversal to filesystem and S3. resolves [#36316](https://github.com/ClickHouse/ClickHouse/issues/36316). [#42376](https://github.com/ClickHouse/ClickHouse/pull/42376) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Support for composite time intervals. 1. Add, subtract and negate operations are now available on Intervals. In the case where the types of Intervals are different, they will be transformed into the Tuple of those types. 2. A tuple of intervals can be added to or subtracted from a Date/DateTime field. 3. Added parsing of Intervals with different types, for example: `INTERVAL '1 HOUR 1 MINUTE 1 SECOND'`. [#42195](https://github.com/ClickHouse/ClickHouse/pull/42195) ([Nikolay Degterinsky](https://github.com/evillique)). +* Added `**` glob support for recursive directory traversal of the filesystem and S3. Resolves [#36316](https://github.com/ClickHouse/ClickHouse/issues/36316). [#42376](https://github.com/ClickHouse/ClickHouse/pull/42376) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). * Introduce `s3_plain` disk type for write-once-read-many operations. Implement `ATTACH` of `MergeTree` table for `s3_plain` disk. [#42628](https://github.com/ClickHouse/ClickHouse/pull/42628) ([Azat Khuzhin](https://github.com/azat)). * Added applied row-level policies to `system.query_log`. [#39819](https://github.com/ClickHouse/ClickHouse/pull/39819) ([Vladimir Chebotaryov](https://github.com/quickhouse)). * Add four-letter command `csnp` for manually creating snapshots in ClickHouse Keeper. Additionally, `lgif` was added to get Raft information for a specific node (e.g. index of last created snapshot, last committed log index). [#41766](https://github.com/ClickHouse/ClickHouse/pull/41766) ([JackyWoo](https://github.com/JackyWoo)). -* Add function `ascii` like in spark: https://spark.apache.org/docs/latest/api/sql/#ascii. [#42670](https://github.com/ClickHouse/ClickHouse/pull/42670) ([李扬](https://github.com/taiyang-li)). -* Add function `pmod` which return non-negative result based on modulo. [#42755](https://github.com/ClickHouse/ClickHouse/pull/42755) ([李扬](https://github.com/taiyang-li)). +* Add function `ascii` like in Apache Spark: https://spark.apache.org/docs/latest/api/sql/#ascii. [#42670](https://github.com/ClickHouse/ClickHouse/pull/42670) ([李扬](https://github.com/taiyang-li)). +* Add function `pmod` which returns non-negative result based on modulo. [#42755](https://github.com/ClickHouse/ClickHouse/pull/42755) ([李扬](https://github.com/taiyang-li)). * Add function `formatReadableDecimalSize`. [#42774](https://github.com/ClickHouse/ClickHouse/pull/42774) ([Alejandro](https://github.com/alexon1234)). -* Add function `randCanonical`, which is similar to the `rand` function in spark or impala. The function generates pseudo random results with independent and identically distributed uniformly distributed values in [0, 1). [#43124](https://github.com/ClickHouse/ClickHouse/pull/43124) ([李扬](https://github.com/taiyang-li)). +* Add function `randCanonical`, which is similar to the `rand` function in Apache Spark or Impala. The function generates pseudo random results with independent and identically distributed uniformly distributed values in [0, 1). [#43124](https://github.com/ClickHouse/ClickHouse/pull/43124) ([李扬](https://github.com/taiyang-li)). * Add function `displayName`, closes [#36770](https://github.com/ClickHouse/ClickHouse/issues/36770). [#37681](https://github.com/ClickHouse/ClickHouse/pull/37681) ([hongbin](https://github.com/xlwh)). * Add `min_age_to_force_merge_on_partition_only` setting to optimize old parts for the entire partition only. [#42659](https://github.com/ClickHouse/ClickHouse/pull/42659) ([Antonio Andelic](https://github.com/antonio2368)). * Add generic implementation for arbitrary structured named collections, access type and `system.named_collections`. [#43147](https://github.com/ClickHouse/ClickHouse/pull/43147) ([Kseniia Sumarokova](https://github.com/kssenii)). @@ -39,21 +39,21 @@ * Speed up AND and OR operators when they are sequenced. [#42214](https://github.com/ClickHouse/ClickHouse/pull/42214) ([Zhiguo Zhou](https://github.com/ZhiguoZh)). * Support parallel parsing for `LineAsString` input format. This improves performance just slightly. This closes [#42502](https://github.com/ClickHouse/ClickHouse/issues/42502). [#42780](https://github.com/ClickHouse/ClickHouse/pull/42780) ([Kruglov Pavel](https://github.com/Avogar)). * ClickHouse Keeper performance improvement: improve commit performance for cases when many different nodes have uncommitted states. This should help with cases when a follower node can't sync fast enough. [#42926](https://github.com/ClickHouse/ClickHouse/pull/42926) ([Antonio Andelic](https://github.com/antonio2368)). -* A condition like `NOT LIKE 'prefix%'` can use primary index. [#42209](https://github.com/ClickHouse/ClickHouse/pull/42209) ([Duc Canh Le](https://github.com/canhld94)). +* A condition like `NOT LIKE 'prefix%'` can use the primary index. [#42209](https://github.com/ClickHouse/ClickHouse/pull/42209) ([Duc Canh Le](https://github.com/canhld94)). #### Experimental Feature * Support type `Object` inside other types, e.g. `Array(JSON)`. [#36969](https://github.com/ClickHouse/ClickHouse/pull/36969) ([Anton Popov](https://github.com/CurtizJ)). * Ignore MySQL binlog SAVEPOINT event for MaterializedMySQL. [#42931](https://github.com/ClickHouse/ClickHouse/pull/42931) ([zzsmdfj](https://github.com/zzsmdfj)). Handle (ignore) SAVEPOINT queries in MaterializedMySQL. [#43086](https://github.com/ClickHouse/ClickHouse/pull/43086) ([Stig Bakken](https://github.com/stigsb)). #### Improvement -* Trivial queries with small LIMIT will properly determine the number of estimated rows to read, so the threshold will be checked properly. Closes [#7071](https://github.com/ClickHouse/ClickHouse/issues/7071). [#42580](https://github.com/ClickHouse/ClickHouse/pull/42580) ([Han Fei](https://github.com/hanfei1991)). +* Trivial queries with small LIMIT will properly determine the number of estimated rows to read, so that the threshold will be checked properly. Closes [#7071](https://github.com/ClickHouse/ClickHouse/issues/7071). [#42580](https://github.com/ClickHouse/ClickHouse/pull/42580) ([Han Fei](https://github.com/hanfei1991)). * Add support for interactive parameters in INSERT VALUES queries. [#43077](https://github.com/ClickHouse/ClickHouse/pull/43077) ([Nikolay Degterinsky](https://github.com/evillique)). -* Added new field `allow_readonly` in `system.table_functions` to allow using table functions in readonly mode resolves [#42414](https://github.com/ClickHouse/ClickHouse/issues/42414) Implementation: * Added a new field allow_readonly to table system.table_functions. * Updated to use new field allow_readonly to allow using table functions in readonly mode. Testing: * Added a test for filesystem tests/queries/0_stateless/02473_functions_in_readonly_mode.sh Documentation: * Updated the english documentation for Table Functions. [#42708](https://github.com/ClickHouse/ClickHouse/pull/42708) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Added new field `allow_readonly` in `system.table_functions` to allow using table functions in readonly mode. Resolves [#42414](https://github.com/ClickHouse/ClickHouse/issues/42414) Implementation: * Added a new field allow_readonly to table system.table_functions. * Updated to use new field allow_readonly to allow using table functions in readonly mode. Testing: * Added a test for filesystem tests/queries/0_stateless/02473_functions_in_readonly_mode.sh Documentation: * Updated the english documentation for Table Functions. [#42708](https://github.com/ClickHouse/ClickHouse/pull/42708) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). * The `system.asynchronous_metrics` gets embedded documentation. This documentation is also exported to Prometheus. Fixed an error with the metrics about `cache` disks - they were calculated only for one arbitrary cache disk instead all of them. This closes [#7644](https://github.com/ClickHouse/ClickHouse/issues/7644). [#43194](https://github.com/ClickHouse/ClickHouse/pull/43194) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * Throttling algorithm changed to token bucket. [#42665](https://github.com/ClickHouse/ClickHouse/pull/42665) ([Sergei Trifonov](https://github.com/serxa)). * Mask passwords and secret keys both in `system.query_log` and `/var/log/clickhouse-server/*.log` and also in error messages. [#42484](https://github.com/ClickHouse/ClickHouse/pull/42484) ([Vitaly Baranov](https://github.com/vitlibar)). * Remove covered parts for fetched part (to avoid possible replication delay grows). [#39737](https://github.com/ClickHouse/ClickHouse/pull/39737) ([Azat Khuzhin](https://github.com/azat)). -* If `/dev/tty` is available, the progress in clickhouse-client and clickhouse-local will be rendered directly to the terminal, without writing to stderr. It allows to get progress even if stderr is redirected to a file, and the file will not be polluted by terminal escape sequences. The progress can be disabled by `--progress false`. This closes [#32238](https://github.com/ClickHouse/ClickHouse/issues/32238). [#42003](https://github.com/ClickHouse/ClickHouse/pull/42003) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* If `/dev/tty` is available, the progress in clickhouse-client and clickhouse-local will be rendered directly to the terminal, without writing to STDERR. It allows getting progress even if STDERR is redirected to a file, and the file will not be polluted by terminal escape sequences. The progress can be disabled by `--progress false`. This closes [#32238](https://github.com/ClickHouse/ClickHouse/issues/32238). [#42003](https://github.com/ClickHouse/ClickHouse/pull/42003) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * Add support for `FixedString` input to base64 coding functions. [#42285](https://github.com/ClickHouse/ClickHouse/pull/42285) ([ltrk2](https://github.com/ltrk2)). * Add columns `bytes_on_disk` and `path` to `system.detached_parts`. Closes [#42264](https://github.com/ClickHouse/ClickHouse/issues/42264). [#42303](https://github.com/ClickHouse/ClickHouse/pull/42303) ([chen](https://github.com/xiedeyantu)). * Improve using structure from insertion table in table functions, now setting `use_structure_from_insertion_table_in_table_functions` has new possible value - `2` that means that ClickHouse will try to determine if we can use structure from insertion table or not automatically. Closes [#40028](https://github.com/ClickHouse/ClickHouse/issues/40028). [#42320](https://github.com/ClickHouse/ClickHouse/pull/42320) ([Kruglov Pavel](https://github.com/Avogar)). @@ -67,7 +67,7 @@ * When the merge task is continuously busy and the disk space is insufficient, the completely expired parts cannot be selected and dropped, resulting in insufficient disk space. My idea is that when the entire Part expires, there is no need for additional disk space to guarantee, ensure the normal execution of TTL. [#42869](https://github.com/ClickHouse/ClickHouse/pull/42869) ([zhongyuankai](https://github.com/zhongyuankai)). * Add `oss` function and `OSS` table engine (this is convenient for users). oss is fully compatible with s3. [#43155](https://github.com/ClickHouse/ClickHouse/pull/43155) ([zzsmdfj](https://github.com/zzsmdfj)). * Improve error reporting in the collection of OS-related info for the `system.asynchronous_metrics` table. [#43192](https://github.com/ClickHouse/ClickHouse/pull/43192) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Modify the `INFORMATION_SCHEMA` tables in a way so that now ClickHouse can connect to itself using the MySQL compatibility protocol. Add columns instead of aliases (related to [#9769](https://github.com/ClickHouse/ClickHouse/issues/9769)). It will improve the compatibility with various MySQL clients. [#43198](https://github.com/ClickHouse/ClickHouse/pull/43198) ([Filatenkov Artur](https://github.com/FArthur-cmd)). +* Modify the `INFORMATION_SCHEMA` tables in a way so that ClickHouse can connect to itself using the MySQL compatibility protocol. Add columns instead of aliases (related to [#9769](https://github.com/ClickHouse/ClickHouse/issues/9769)). It will improve the compatibility with various MySQL clients. [#43198](https://github.com/ClickHouse/ClickHouse/pull/43198) ([Filatenkov Artur](https://github.com/FArthur-cmd)). * Add some functions for compatibility with PowerBI, when it connects using MySQL protocol [#42612](https://github.com/ClickHouse/ClickHouse/pull/42612) ([Filatenkov Artur](https://github.com/FArthur-cmd)). * Better usability for Dashboard on changes [#42872](https://github.com/ClickHouse/ClickHouse/pull/42872) ([Vladimir C](https://github.com/vdimir)). @@ -81,15 +81,15 @@ #### Bug Fix (user-visible misbehavior in official stable or prestable release) -* Updated normaliser to clone the alias ast. resolves [#42452](https://github.com/ClickHouse/ClickHouse/issues/42452) Implementation: * Updated QueryNormalizer to clone alias ast, when its replaced. Previously just assigning the same leads to exception in LogicalExpressinsOptimizer as it would be the same parent being inserted again. * This bug is not seen with new analyser (allow_experimental_analyzer), so no changes for it. I added a test for the same. [#42827](https://github.com/ClickHouse/ClickHouse/pull/42827) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Updated normaliser to clone the alias ast. Resolves [#42452](https://github.com/ClickHouse/ClickHouse/issues/42452) Implementation: * Updated QueryNormalizer to clone alias ast, when its replaced. Previously just assigning the same leads to exception in LogicalExpressinsOptimizer as it would be the same parent being inserted again. * This bug is not seen with new analyser (allow_experimental_analyzer), so no changes for it. I added a test for the same. [#42827](https://github.com/ClickHouse/ClickHouse/pull/42827) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). * Fix race for backup of tables in `Lazy` databases. [#43104](https://github.com/ClickHouse/ClickHouse/pull/43104) ([Vitaly Baranov](https://github.com/vitlibar)). * Fix for `skip_unavailable_shards`: it did not work with the `s3Cluster` table function. [#43131](https://github.com/ClickHouse/ClickHouse/pull/43131) ([chen](https://github.com/xiedeyantu)). * Fix schema inference in `s3Cluster` and improvement in `hdfsCluster`. [#41979](https://github.com/ClickHouse/ClickHouse/pull/41979) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix retries while reading from URL table engines / table function. (retrtiable errors could be retries more times than needed, non-retrialble errors resulted in failed assertion in code). [#42224](https://github.com/ClickHouse/ClickHouse/pull/42224) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix retries while reading from URL table engines / table function. (retriable errors could be retries more times than needed, non-retriable errors resulted in failed assertion in code). [#42224](https://github.com/ClickHouse/ClickHouse/pull/42224) ([Kseniia Sumarokova](https://github.com/kssenii)). * A segmentation fault related to DNS & c-ares has been reported and fixed. [#42234](https://github.com/ClickHouse/ClickHouse/pull/42234) ([Arthur Passos](https://github.com/arthurpassos)). * Fix `LOGICAL_ERROR` `Arguments of 'plus' have incorrect data types` which may happen in PK analysis (monotonicity check). Fix invalid PK analysis for monotonic binary functions with first constant argument. [#42410](https://github.com/ClickHouse/ClickHouse/pull/42410) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). * Fix incorrect key analysis when key types cannot be inside Nullable. This fixes [#42456](https://github.com/ClickHouse/ClickHouse/issues/42456). [#42469](https://github.com/ClickHouse/ClickHouse/pull/42469) ([Amos Bird](https://github.com/amosbird)). -* Fix typo in setting name that led to bad usage of schema inference cache while using setting `input_format_csv_use_best_effort_in_schema_inference`. Closes [#41735](https://github.com/ClickHouse/ClickHouse/issues/41735). [#42536](https://github.com/ClickHouse/ClickHouse/pull/42536) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix typo in a setting name that led to bad usage of schema inference cache while using setting `input_format_csv_use_best_effort_in_schema_inference`. Closes [#41735](https://github.com/ClickHouse/ClickHouse/issues/41735). [#42536](https://github.com/ClickHouse/ClickHouse/pull/42536) ([Kruglov Pavel](https://github.com/Avogar)). * Fix creating a Set with wrong header when data type is LowCardinality. Closes [#42460](https://github.com/ClickHouse/ClickHouse/issues/42460). [#42579](https://github.com/ClickHouse/ClickHouse/pull/42579) ([flynn](https://github.com/ucasfl)). * `(U)Int128` and `(U)Int256` values were correctly checked in `PREWHERE`. [#42605](https://github.com/ClickHouse/ClickHouse/pull/42605) ([Antonio Andelic](https://github.com/antonio2368)). * Fix a bug in functions parser that could have led to a segmentation fault. [#42724](https://github.com/ClickHouse/ClickHouse/pull/42724) ([Nikolay Degterinsky](https://github.com/evillique)). @@ -98,19 +98,19 @@ * Fix `auth_type` mapping in `system.session_log`, by including `SSL_CERTIFICATE` for the enum values. [#42782](https://github.com/ClickHouse/ClickHouse/pull/42782) ([Miel Donkers](https://github.com/mdonkers)). * Fix stack-use-after-return under ASAN build in the Create User query parser. [#42804](https://github.com/ClickHouse/ClickHouse/pull/42804) ([Nikolay Degterinsky](https://github.com/evillique)). * Fix `lowerUTF8`/`upperUTF8` in case of symbol was in between 16-byte boundary (very frequent case of you have strings > 16 bytes long). [#42812](https://github.com/ClickHouse/ClickHouse/pull/42812) ([Azat Khuzhin](https://github.com/azat)). -* Additional bound check was added to lz4 decompression routine to fix misbehaviour in case of malformed input. [#42868](https://github.com/ClickHouse/ClickHouse/pull/42868) ([Nikita Taranov](https://github.com/nickitat)). -* Fix rare possible hung on query cancellation. [#42874](https://github.com/ClickHouse/ClickHouse/pull/42874) ([Azat Khuzhin](https://github.com/azat)). +* Additional bound check was added to LZ4 decompression routine to fix misbehaviour in case of malformed input. [#42868](https://github.com/ClickHouse/ClickHouse/pull/42868) ([Nikita Taranov](https://github.com/nickitat)). +* Fix rare possible hang on query cancellation. [#42874](https://github.com/ClickHouse/ClickHouse/pull/42874) ([Azat Khuzhin](https://github.com/azat)). * Fix incorrect behavior with multiple disjuncts in hash join, close [#42832](https://github.com/ClickHouse/ClickHouse/issues/42832). [#42876](https://github.com/ClickHouse/ClickHouse/pull/42876) ([Vladimir C](https://github.com/vdimir)). * A null pointer will be generated when select if as from ‘three table join’ , For example, this SQL query: [#42883](https://github.com/ClickHouse/ClickHouse/pull/42883) ([zzsmdfj](https://github.com/zzsmdfj)). * Fix memory sanitizer report in Cluster Discovery, close [#42763](https://github.com/ClickHouse/ClickHouse/issues/42763). [#42905](https://github.com/ClickHouse/ClickHouse/pull/42905) ([Vladimir C](https://github.com/vdimir)). -* Improve datetime schema inference in case of empty string. [#42911](https://github.com/ClickHouse/ClickHouse/pull/42911) ([Kruglov Pavel](https://github.com/Avogar)). +* Improve DateTime schema inference in case of empty string. [#42911](https://github.com/ClickHouse/ClickHouse/pull/42911) ([Kruglov Pavel](https://github.com/Avogar)). * Fix rare NOT_FOUND_COLUMN_IN_BLOCK error when projection is possible to use but there is no projection available. This fixes [#42771](https://github.com/ClickHouse/ClickHouse/issues/42771) . The bug was introduced in https://github.com/ClickHouse/ClickHouse/pull/25563. [#42938](https://github.com/ClickHouse/ClickHouse/pull/42938) ([Amos Bird](https://github.com/amosbird)). * Fix ATTACH TABLE in `PostgreSQL` database engine if the table contains DATETIME data type. Closes [#42817](https://github.com/ClickHouse/ClickHouse/issues/42817). [#42960](https://github.com/ClickHouse/ClickHouse/pull/42960) ([Kseniia Sumarokova](https://github.com/kssenii)). * Fix lambda parsing. Closes [#41848](https://github.com/ClickHouse/ClickHouse/issues/41848). [#42979](https://github.com/ClickHouse/ClickHouse/pull/42979) ([Nikolay Degterinsky](https://github.com/evillique)). * Fix incorrect key analysis when nullable keys appear in the middle of a hyperrectangle. This fixes [#43111](https://github.com/ClickHouse/ClickHouse/issues/43111) . [#43133](https://github.com/ClickHouse/ClickHouse/pull/43133) ([Amos Bird](https://github.com/amosbird)). * Fix several buffer over-reads in deserialization of carefully crafted aggregate function states. [#43159](https://github.com/ClickHouse/ClickHouse/pull/43159) ([Raúl Marín](https://github.com/Algunenano)). * Fix function `if` in case of NULL and const Nullable arguments. Closes [#43069](https://github.com/ClickHouse/ClickHouse/issues/43069). [#43178](https://github.com/ClickHouse/ClickHouse/pull/43178) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix decimal math overflow in parsing datetime with the 'best effort' algorithm. Closes [#43061](https://github.com/ClickHouse/ClickHouse/issues/43061). [#43180](https://github.com/ClickHouse/ClickHouse/pull/43180) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix decimal math overflow in parsing DateTime with the 'best effort' algorithm. Closes [#43061](https://github.com/ClickHouse/ClickHouse/issues/43061). [#43180](https://github.com/ClickHouse/ClickHouse/pull/43180) ([Kruglov Pavel](https://github.com/Avogar)). * The `indent` field produced by the `git-import` tool was miscalculated. See https://clickhouse.com/docs/en/getting-started/example-datasets/github/. [#43191](https://github.com/ClickHouse/ClickHouse/pull/43191) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * Fixed unexpected behaviour of `Interval` types with subquery and casting. [#43193](https://github.com/ClickHouse/ClickHouse/pull/43193) ([jh0x](https://github.com/jh0x)). @@ -673,7 +673,7 @@ * Support SQL standard CREATE INDEX and DROP INDEX syntax. [#35166](https://github.com/ClickHouse/ClickHouse/pull/35166) ([Jianmei Zhang](https://github.com/zhangjmruc)). * Send profile events for INSERT queries (previously only SELECT was supported). [#37391](https://github.com/ClickHouse/ClickHouse/pull/37391) ([Azat Khuzhin](https://github.com/azat)). * Implement in order aggregation (`optimize_aggregation_in_order`) for fully materialized projections. [#37469](https://github.com/ClickHouse/ClickHouse/pull/37469) ([Azat Khuzhin](https://github.com/azat)). -* Remove subprocess run for kerberos initialization. Added new integration test. Closes [#27651](https://github.com/ClickHouse/ClickHouse/issues/27651). [#38105](https://github.com/ClickHouse/ClickHouse/pull/38105) ([Roman Vasin](https://github.com/rvasin)). +* Remove subprocess run for Kerberos initialization. Added new integration test. Closes [#27651](https://github.com/ClickHouse/ClickHouse/issues/27651). [#38105](https://github.com/ClickHouse/ClickHouse/pull/38105) ([Roman Vasin](https://github.com/rvasin)). * * Add setting `multiple_joins_try_to_keep_original_names` to not rewrite identifier name on multiple JOINs rewrite, close [#34697](https://github.com/ClickHouse/ClickHouse/issues/34697). [#38149](https://github.com/ClickHouse/ClickHouse/pull/38149) ([Vladimir C](https://github.com/vdimir)). * Improved trace-visualizer UX. [#38169](https://github.com/ClickHouse/ClickHouse/pull/38169) ([Sergei Trifonov](https://github.com/serxa)). * Enable stack trace collection and query profiler for AArch64. [#38181](https://github.com/ClickHouse/ClickHouse/pull/38181) ([Maksim Kita](https://github.com/kitaisreal)). @@ -953,8 +953,8 @@ #### Upgrade Notes -* Now, background merges, mutations and `OPTIMIZE` will not increment `SelectedRows` and `SelectedBytes` metrics. They (still) will increment `MergedRows` and `MergedUncompressedBytes` as it was before. This only affects the metric values, and makes them better. This change does not introduce any incompatibility, but you may wonder about the changes of metrics, so we put in this category. [#37040](https://github.com/ClickHouse/ClickHouse/pull/37040) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Updated the BoringSSL module to the official FIPS compliant version. This makes ClickHouse FIPS compliant. [#35914](https://github.com/ClickHouse/ClickHouse/pull/35914) ([Meena-Renganathan](https://github.com/Meena-Renganathan)). The ciphers `aes-192-cfb128` and `aes-256-cfb128` were removed, because they are not included in the FIPS certified version of BoringSSL. +* Now, background merges, mutations, and `OPTIMIZE` will not increment `SelectedRows` and `SelectedBytes` metrics. They (still) will increment `MergedRows` and `MergedUncompressedBytes` as it was before. This only affects the metric values and makes them better. This change does not introduce any incompatibility, but you may wonder about the changes to the metrics, so we put in this category. [#37040](https://github.com/ClickHouse/ClickHouse/pull/37040) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Updated the BoringSSL module to the official FIPS compliant version. This makes ClickHouse FIPS compliant in this area. [#35914](https://github.com/ClickHouse/ClickHouse/pull/35914) ([Meena-Renganathan](https://github.com/Meena-Renganathan)). The ciphers `aes-192-cfb128` and `aes-256-cfb128` were removed, because they are not included in the FIPS certified version of BoringSSL. * `max_memory_usage` setting is removed from the default user profile in `users.xml`. This enables flexible memory limits for queries instead of the old rigid limit of 10 GB. * Disable `log_query_threads` setting by default. It controls the logging of statistics about every thread participating in query execution. After supporting asynchronous reads, the total number of distinct thread ids became too large, and logging into the `query_thread_log` has become too heavy. [#37077](https://github.com/ClickHouse/ClickHouse/pull/37077) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * Remove function `groupArraySorted` which has a bug. [#36822](https://github.com/ClickHouse/ClickHouse/pull/36822) ([Alexey Milovidov](https://github.com/alexey-milovidov)). From 5e087ae967db4b4cd1e2642689a971c8e7d69c28 Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Wed, 16 Nov 2022 11:54:06 -0500 Subject: [PATCH 71/80] link to tutorial --- .../dictionaries/external-dictionaries/external-dicts.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts.md index 06b5b8a6746..9f922a2cccb 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts.md @@ -7,7 +7,9 @@ import CloudDetails from '@site/docs/en/sql-reference/dictionaries/external-dict # Dictionaries - +:::tip Tutorial +If you are getting started with Dictionaries in ClickHouse we have a tutorial that covers that topic. Take a look [here](/docs/en/tutorial.md). +::: You can add your own dictionaries from various data sources. The source for a dictionary can be a ClickHouse table, a local text or executable file, an HTTP(s) resource, or another DBMS. For more information, see “[Dictionary Sources](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md)”. @@ -27,6 +29,8 @@ The [dictionaries](../../../operations/system-tables/dictionaries.md#system_tabl - Configuration parameters. - Metrics like amount of RAM allocated for the dictionary or a number of queries since the dictionary was successfully loaded. + + ## Creating a dictionary with a DDL query Dictionaries can be created with [DDL queries](../../../sql-reference/statements/create/dictionary.md), and this is the recommended method because with DDL created dictionaries: From 4016fd2efcb196a97bfc157f661acf6c165f1fa8 Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Wed, 16 Nov 2022 11:58:10 -0500 Subject: [PATCH 72/80] remove changes --- CHANGELOG.md | 109 ++------------------------------------------------- 1 file changed, 3 insertions(+), 106 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 051bde44dd2..68767612892 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,4 @@ ### Table of Contents -**[ClickHouse release v22.11, 2022-11-17](#2211)**
**[ClickHouse release v22.10, 2022-10-25](#2210)**
**[ClickHouse release v22.9, 2022-09-22](#229)**
**[ClickHouse release v22.8-lts, 2022-08-18](#228)**
@@ -12,108 +11,6 @@ **[ClickHouse release v22.1, 2022-01-18](#221)**
**[Changelog for 2021](https://clickhouse.com/docs/en/whats-new/changelog/2021/)**
-### ClickHouse release 22.11, 2022-11-17 - -#### Backward Incompatible Change -* `JSONExtract` family of functions will now attempt to coerce to the requested type. [#41502](https://github.com/ClickHouse/ClickHouse/pull/41502) ([Márcio Martins](https://github.com/marcioapm)). - -#### New Feature -* Adds support for retries during INSERTs into ReplicatedMergeTree when a session with ClickHouse Keeper is lost. Apart from fault tolerance, it aims to provide better user experience, - avoid returning a user an error during insert if keeper is restarted (for example, due to upgrade). [#42607](https://github.com/ClickHouse/ClickHouse/pull/42607) ([Igor Nikonov](https://github.com/devcrafter)). -* Add `Hudi` and `DeltaLake` table engines, read-only, only for tables on S3. [#41054](https://github.com/ClickHouse/ClickHouse/pull/41054) ([Daniil Rubin](https://github.com/rubin-do), [Kseniia Sumarokova](https://github.com/kssenii)). -* Add table function `hudi` and `deltaLake`. [#43080](https://github.com/ClickHouse/ClickHouse/pull/43080) ([flynn](https://github.com/ucasfl)). -* Support for composite time intervals. 1. Add, subtract and negate operations are now available on Intervals. In the case where the types of Intervals are different, they will be transformed into the Tuple of those types. 2. A tuple of intervals can be added to or subtracted from a Date/DateTime field. 3. Added parsing of Intervals with different types, for example: `INTERVAL '1 HOUR 1 MINUTE 1 SECOND'`. [#42195](https://github.com/ClickHouse/ClickHouse/pull/42195) ([Nikolay Degterinsky](https://github.com/evillique)). -* Added `**` glob support for recursive directory traversal of the filesystem and S3. Resolves [#36316](https://github.com/ClickHouse/ClickHouse/issues/36316). [#42376](https://github.com/ClickHouse/ClickHouse/pull/42376) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). -* Introduce `s3_plain` disk type for write-once-read-many operations. Implement `ATTACH` of `MergeTree` table for `s3_plain` disk. [#42628](https://github.com/ClickHouse/ClickHouse/pull/42628) ([Azat Khuzhin](https://github.com/azat)). -* Added applied row-level policies to `system.query_log`. [#39819](https://github.com/ClickHouse/ClickHouse/pull/39819) ([Vladimir Chebotaryov](https://github.com/quickhouse)). -* Add four-letter command `csnp` for manually creating snapshots in ClickHouse Keeper. Additionally, `lgif` was added to get Raft information for a specific node (e.g. index of last created snapshot, last committed log index). [#41766](https://github.com/ClickHouse/ClickHouse/pull/41766) ([JackyWoo](https://github.com/JackyWoo)). -* Add function `ascii` like in Apache Spark: https://spark.apache.org/docs/latest/api/sql/#ascii. [#42670](https://github.com/ClickHouse/ClickHouse/pull/42670) ([李扬](https://github.com/taiyang-li)). -* Add function `pmod` which returns non-negative result based on modulo. [#42755](https://github.com/ClickHouse/ClickHouse/pull/42755) ([李扬](https://github.com/taiyang-li)). -* Add function `formatReadableDecimalSize`. [#42774](https://github.com/ClickHouse/ClickHouse/pull/42774) ([Alejandro](https://github.com/alexon1234)). -* Add function `randCanonical`, which is similar to the `rand` function in Apache Spark or Impala. The function generates pseudo random results with independent and identically distributed uniformly distributed values in [0, 1). [#43124](https://github.com/ClickHouse/ClickHouse/pull/43124) ([李扬](https://github.com/taiyang-li)). -* Add function `displayName`, closes [#36770](https://github.com/ClickHouse/ClickHouse/issues/36770). [#37681](https://github.com/ClickHouse/ClickHouse/pull/37681) ([hongbin](https://github.com/xlwh)). -* Add `min_age_to_force_merge_on_partition_only` setting to optimize old parts for the entire partition only. [#42659](https://github.com/ClickHouse/ClickHouse/pull/42659) ([Antonio Andelic](https://github.com/antonio2368)). -* Add generic implementation for arbitrary structured named collections, access type and `system.named_collections`. [#43147](https://github.com/ClickHouse/ClickHouse/pull/43147) ([Kseniia Sumarokova](https://github.com/kssenii)). - -#### Performance Improvement -* `match` function can use the index if it's a condition on string prefix. This closes [#37333](https://github.com/ClickHouse/ClickHouse/issues/37333). [#42458](https://github.com/ClickHouse/ClickHouse/pull/42458) ([clarkcaoliu](https://github.com/Clark0)). -* Speed up AND and OR operators when they are sequenced. [#42214](https://github.com/ClickHouse/ClickHouse/pull/42214) ([Zhiguo Zhou](https://github.com/ZhiguoZh)). -* Support parallel parsing for `LineAsString` input format. This improves performance just slightly. This closes [#42502](https://github.com/ClickHouse/ClickHouse/issues/42502). [#42780](https://github.com/ClickHouse/ClickHouse/pull/42780) ([Kruglov Pavel](https://github.com/Avogar)). -* ClickHouse Keeper performance improvement: improve commit performance for cases when many different nodes have uncommitted states. This should help with cases when a follower node can't sync fast enough. [#42926](https://github.com/ClickHouse/ClickHouse/pull/42926) ([Antonio Andelic](https://github.com/antonio2368)). -* A condition like `NOT LIKE 'prefix%'` can use the primary index. [#42209](https://github.com/ClickHouse/ClickHouse/pull/42209) ([Duc Canh Le](https://github.com/canhld94)). - -#### Experimental Feature -* Support type `Object` inside other types, e.g. `Array(JSON)`. [#36969](https://github.com/ClickHouse/ClickHouse/pull/36969) ([Anton Popov](https://github.com/CurtizJ)). -* Ignore MySQL binlog SAVEPOINT event for MaterializedMySQL. [#42931](https://github.com/ClickHouse/ClickHouse/pull/42931) ([zzsmdfj](https://github.com/zzsmdfj)). Handle (ignore) SAVEPOINT queries in MaterializedMySQL. [#43086](https://github.com/ClickHouse/ClickHouse/pull/43086) ([Stig Bakken](https://github.com/stigsb)). - -#### Improvement -* Trivial queries with small LIMIT will properly determine the number of estimated rows to read, so that the threshold will be checked properly. Closes [#7071](https://github.com/ClickHouse/ClickHouse/issues/7071). [#42580](https://github.com/ClickHouse/ClickHouse/pull/42580) ([Han Fei](https://github.com/hanfei1991)). -* Add support for interactive parameters in INSERT VALUES queries. [#43077](https://github.com/ClickHouse/ClickHouse/pull/43077) ([Nikolay Degterinsky](https://github.com/evillique)). -* Added new field `allow_readonly` in `system.table_functions` to allow using table functions in readonly mode. Resolves [#42414](https://github.com/ClickHouse/ClickHouse/issues/42414) Implementation: * Added a new field allow_readonly to table system.table_functions. * Updated to use new field allow_readonly to allow using table functions in readonly mode. Testing: * Added a test for filesystem tests/queries/0_stateless/02473_functions_in_readonly_mode.sh Documentation: * Updated the english documentation for Table Functions. [#42708](https://github.com/ClickHouse/ClickHouse/pull/42708) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). -* The `system.asynchronous_metrics` gets embedded documentation. This documentation is also exported to Prometheus. Fixed an error with the metrics about `cache` disks - they were calculated only for one arbitrary cache disk instead all of them. This closes [#7644](https://github.com/ClickHouse/ClickHouse/issues/7644). [#43194](https://github.com/ClickHouse/ClickHouse/pull/43194) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Throttling algorithm changed to token bucket. [#42665](https://github.com/ClickHouse/ClickHouse/pull/42665) ([Sergei Trifonov](https://github.com/serxa)). -* Mask passwords and secret keys both in `system.query_log` and `/var/log/clickhouse-server/*.log` and also in error messages. [#42484](https://github.com/ClickHouse/ClickHouse/pull/42484) ([Vitaly Baranov](https://github.com/vitlibar)). -* Remove covered parts for fetched part (to avoid possible replication delay grows). [#39737](https://github.com/ClickHouse/ClickHouse/pull/39737) ([Azat Khuzhin](https://github.com/azat)). -* If `/dev/tty` is available, the progress in clickhouse-client and clickhouse-local will be rendered directly to the terminal, without writing to STDERR. It allows getting progress even if STDERR is redirected to a file, and the file will not be polluted by terminal escape sequences. The progress can be disabled by `--progress false`. This closes [#32238](https://github.com/ClickHouse/ClickHouse/issues/32238). [#42003](https://github.com/ClickHouse/ClickHouse/pull/42003) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Add support for `FixedString` input to base64 coding functions. [#42285](https://github.com/ClickHouse/ClickHouse/pull/42285) ([ltrk2](https://github.com/ltrk2)). -* Add columns `bytes_on_disk` and `path` to `system.detached_parts`. Closes [#42264](https://github.com/ClickHouse/ClickHouse/issues/42264). [#42303](https://github.com/ClickHouse/ClickHouse/pull/42303) ([chen](https://github.com/xiedeyantu)). -* Improve using structure from insertion table in table functions, now setting `use_structure_from_insertion_table_in_table_functions` has new possible value - `2` that means that ClickHouse will try to determine if we can use structure from insertion table or not automatically. Closes [#40028](https://github.com/ClickHouse/ClickHouse/issues/40028). [#42320](https://github.com/ClickHouse/ClickHouse/pull/42320) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix no progress indication on INSERT FROM INFILE. Closes [#42548](https://github.com/ClickHouse/ClickHouse/issues/42548). [#42634](https://github.com/ClickHouse/ClickHouse/pull/42634) ([chen](https://github.com/xiedeyantu)). -* Refactor function `tokens` to enable max tokens returned for related functions (disabled by default). [#42673](https://github.com/ClickHouse/ClickHouse/pull/42673) ([李扬](https://github.com/taiyang-li)). -* Allow to use `Date32` arguments for `formatDateTime` and `FROM_UNIXTIME` functions. [#42737](https://github.com/ClickHouse/ClickHouse/pull/42737) ([Roman Vasin](https://github.com/rvasin)). -* Update tzdata to 2022f. Mexico will no longer observe DST except near the US border: https://www.timeanddate.com/news/time/mexico-abolishes-dst-2022.html. Chihuahua moves to year-round UTC-6 on 2022-10-30. Fiji no longer observes DST. See https://github.com/google/cctz/pull/235 and https://bugs.launchpad.net/ubuntu/+source/tzdata/+bug/1995209. [#42796](https://github.com/ClickHouse/ClickHouse/pull/42796) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Add `FailedAsyncInsertQuery` event metric for async inserts. [#42814](https://github.com/ClickHouse/ClickHouse/pull/42814) ([Krzysztof Góralski](https://github.com/kgoralski)). -* Implement `read-in-order` optimization on top of query plan. It is enabled by default. Set `query_plan_read_in_order = 0` to use previous AST-based version. [#42829](https://github.com/ClickHouse/ClickHouse/pull/42829) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Increase the size of upload part exponentially for backup to S3 to avoid errors about max 10 000 parts limit of the multipart upload to s3. [#42833](https://github.com/ClickHouse/ClickHouse/pull/42833) ([Vitaly Baranov](https://github.com/vitlibar)). -* When the merge task is continuously busy and the disk space is insufficient, the completely expired parts cannot be selected and dropped, resulting in insufficient disk space. My idea is that when the entire Part expires, there is no need for additional disk space to guarantee, ensure the normal execution of TTL. [#42869](https://github.com/ClickHouse/ClickHouse/pull/42869) ([zhongyuankai](https://github.com/zhongyuankai)). -* Add `oss` function and `OSS` table engine (this is convenient for users). oss is fully compatible with s3. [#43155](https://github.com/ClickHouse/ClickHouse/pull/43155) ([zzsmdfj](https://github.com/zzsmdfj)). -* Improve error reporting in the collection of OS-related info for the `system.asynchronous_metrics` table. [#43192](https://github.com/ClickHouse/ClickHouse/pull/43192) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Modify the `INFORMATION_SCHEMA` tables in a way so that ClickHouse can connect to itself using the MySQL compatibility protocol. Add columns instead of aliases (related to [#9769](https://github.com/ClickHouse/ClickHouse/issues/9769)). It will improve the compatibility with various MySQL clients. [#43198](https://github.com/ClickHouse/ClickHouse/pull/43198) ([Filatenkov Artur](https://github.com/FArthur-cmd)). -* Add some functions for compatibility with PowerBI, when it connects using MySQL protocol [#42612](https://github.com/ClickHouse/ClickHouse/pull/42612) ([Filatenkov Artur](https://github.com/FArthur-cmd)). -* Better usability for Dashboard on changes [#42872](https://github.com/ClickHouse/ClickHouse/pull/42872) ([Vladimir C](https://github.com/vdimir)). - -#### Build/Testing/Packaging Improvement -* Run SQLancer for each pull request and commit to master. [SQLancer](https://github.com/sqlancer/sqlancer) is an OpenSource fuzzer that focuses on automatic detection of logical bugs. [#42397](https://github.com/ClickHouse/ClickHouse/pull/42397) ([Ilya Yatsishin](https://github.com/qoega)). -* Update to latest zlib-ng. [#42463](https://github.com/ClickHouse/ClickHouse/pull/42463) ([Boris Kuschel](https://github.com/bkuschel)). -* Add support for testing ClickHouse server with Jepsen. By the way, we already have support for testing ClickHouse Keeper with Jepsen. This pull request extends it to Replicated tables. [#42619](https://github.com/ClickHouse/ClickHouse/pull/42619) ([Antonio Andelic](https://github.com/antonio2368)). -* Use https://github.com/matus-chochlik/ctcache for clang-tidy results caching. [#42913](https://github.com/ClickHouse/ClickHouse/pull/42913) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -* Before the fix, the user-defined config was preserved by RPM in `$file.rpmsave`. The PR fixes it and won't replace the user's files from packages. [#42936](https://github.com/ClickHouse/ClickHouse/pull/42936) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -* Remove some libraries from Ubuntu Docker image. [#42622](https://github.com/ClickHouse/ClickHouse/pull/42622) ([Alexey Milovidov](https://github.com/alexey-milovidov)). - -#### Bug Fix (user-visible misbehavior in official stable or prestable release) - -* Updated normaliser to clone the alias ast. Resolves [#42452](https://github.com/ClickHouse/ClickHouse/issues/42452) Implementation: * Updated QueryNormalizer to clone alias ast, when its replaced. Previously just assigning the same leads to exception in LogicalExpressinsOptimizer as it would be the same parent being inserted again. * This bug is not seen with new analyser (allow_experimental_analyzer), so no changes for it. I added a test for the same. [#42827](https://github.com/ClickHouse/ClickHouse/pull/42827) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). -* Fix race for backup of tables in `Lazy` databases. [#43104](https://github.com/ClickHouse/ClickHouse/pull/43104) ([Vitaly Baranov](https://github.com/vitlibar)). -* Fix for `skip_unavailable_shards`: it did not work with the `s3Cluster` table function. [#43131](https://github.com/ClickHouse/ClickHouse/pull/43131) ([chen](https://github.com/xiedeyantu)). -* Fix schema inference in `s3Cluster` and improvement in `hdfsCluster`. [#41979](https://github.com/ClickHouse/ClickHouse/pull/41979) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix retries while reading from URL table engines / table function. (retriable errors could be retries more times than needed, non-retriable errors resulted in failed assertion in code). [#42224](https://github.com/ClickHouse/ClickHouse/pull/42224) ([Kseniia Sumarokova](https://github.com/kssenii)). -* A segmentation fault related to DNS & c-ares has been reported and fixed. [#42234](https://github.com/ClickHouse/ClickHouse/pull/42234) ([Arthur Passos](https://github.com/arthurpassos)). -* Fix `LOGICAL_ERROR` `Arguments of 'plus' have incorrect data types` which may happen in PK analysis (monotonicity check). Fix invalid PK analysis for monotonic binary functions with first constant argument. [#42410](https://github.com/ClickHouse/ClickHouse/pull/42410) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix incorrect key analysis when key types cannot be inside Nullable. This fixes [#42456](https://github.com/ClickHouse/ClickHouse/issues/42456). [#42469](https://github.com/ClickHouse/ClickHouse/pull/42469) ([Amos Bird](https://github.com/amosbird)). -* Fix typo in a setting name that led to bad usage of schema inference cache while using setting `input_format_csv_use_best_effort_in_schema_inference`. Closes [#41735](https://github.com/ClickHouse/ClickHouse/issues/41735). [#42536](https://github.com/ClickHouse/ClickHouse/pull/42536) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix creating a Set with wrong header when data type is LowCardinality. Closes [#42460](https://github.com/ClickHouse/ClickHouse/issues/42460). [#42579](https://github.com/ClickHouse/ClickHouse/pull/42579) ([flynn](https://github.com/ucasfl)). -* `(U)Int128` and `(U)Int256` values were correctly checked in `PREWHERE`. [#42605](https://github.com/ClickHouse/ClickHouse/pull/42605) ([Antonio Andelic](https://github.com/antonio2368)). -* Fix a bug in functions parser that could have led to a segmentation fault. [#42724](https://github.com/ClickHouse/ClickHouse/pull/42724) ([Nikolay Degterinsky](https://github.com/evillique)). -* Fix the locking in `truncate table`. [#42728](https://github.com/ClickHouse/ClickHouse/pull/42728) ([flynn](https://github.com/ucasfl)). -* Fix possible crash in `web` disks when file does not exist (or `OPTIMIZE TABLE FINAL`, that also can got the same error eventually). [#42767](https://github.com/ClickHouse/ClickHouse/pull/42767) ([Azat Khuzhin](https://github.com/azat)). -* Fix `auth_type` mapping in `system.session_log`, by including `SSL_CERTIFICATE` for the enum values. [#42782](https://github.com/ClickHouse/ClickHouse/pull/42782) ([Miel Donkers](https://github.com/mdonkers)). -* Fix stack-use-after-return under ASAN build in the Create User query parser. [#42804](https://github.com/ClickHouse/ClickHouse/pull/42804) ([Nikolay Degterinsky](https://github.com/evillique)). -* Fix `lowerUTF8`/`upperUTF8` in case of symbol was in between 16-byte boundary (very frequent case of you have strings > 16 bytes long). [#42812](https://github.com/ClickHouse/ClickHouse/pull/42812) ([Azat Khuzhin](https://github.com/azat)). -* Additional bound check was added to LZ4 decompression routine to fix misbehaviour in case of malformed input. [#42868](https://github.com/ClickHouse/ClickHouse/pull/42868) ([Nikita Taranov](https://github.com/nickitat)). -* Fix rare possible hang on query cancellation. [#42874](https://github.com/ClickHouse/ClickHouse/pull/42874) ([Azat Khuzhin](https://github.com/azat)). -* Fix incorrect behavior with multiple disjuncts in hash join, close [#42832](https://github.com/ClickHouse/ClickHouse/issues/42832). [#42876](https://github.com/ClickHouse/ClickHouse/pull/42876) ([Vladimir C](https://github.com/vdimir)). -* A null pointer will be generated when select if as from ‘three table join’ , For example, this SQL query: [#42883](https://github.com/ClickHouse/ClickHouse/pull/42883) ([zzsmdfj](https://github.com/zzsmdfj)). -* Fix memory sanitizer report in Cluster Discovery, close [#42763](https://github.com/ClickHouse/ClickHouse/issues/42763). [#42905](https://github.com/ClickHouse/ClickHouse/pull/42905) ([Vladimir C](https://github.com/vdimir)). -* Improve DateTime schema inference in case of empty string. [#42911](https://github.com/ClickHouse/ClickHouse/pull/42911) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix rare NOT_FOUND_COLUMN_IN_BLOCK error when projection is possible to use but there is no projection available. This fixes [#42771](https://github.com/ClickHouse/ClickHouse/issues/42771) . The bug was introduced in https://github.com/ClickHouse/ClickHouse/pull/25563. [#42938](https://github.com/ClickHouse/ClickHouse/pull/42938) ([Amos Bird](https://github.com/amosbird)). -* Fix ATTACH TABLE in `PostgreSQL` database engine if the table contains DATETIME data type. Closes [#42817](https://github.com/ClickHouse/ClickHouse/issues/42817). [#42960](https://github.com/ClickHouse/ClickHouse/pull/42960) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix lambda parsing. Closes [#41848](https://github.com/ClickHouse/ClickHouse/issues/41848). [#42979](https://github.com/ClickHouse/ClickHouse/pull/42979) ([Nikolay Degterinsky](https://github.com/evillique)). -* Fix incorrect key analysis when nullable keys appear in the middle of a hyperrectangle. This fixes [#43111](https://github.com/ClickHouse/ClickHouse/issues/43111) . [#43133](https://github.com/ClickHouse/ClickHouse/pull/43133) ([Amos Bird](https://github.com/amosbird)). -* Fix several buffer over-reads in deserialization of carefully crafted aggregate function states. [#43159](https://github.com/ClickHouse/ClickHouse/pull/43159) ([Raúl Marín](https://github.com/Algunenano)). -* Fix function `if` in case of NULL and const Nullable arguments. Closes [#43069](https://github.com/ClickHouse/ClickHouse/issues/43069). [#43178](https://github.com/ClickHouse/ClickHouse/pull/43178) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix decimal math overflow in parsing DateTime with the 'best effort' algorithm. Closes [#43061](https://github.com/ClickHouse/ClickHouse/issues/43061). [#43180](https://github.com/ClickHouse/ClickHouse/pull/43180) ([Kruglov Pavel](https://github.com/Avogar)). -* The `indent` field produced by the `git-import` tool was miscalculated. See https://clickhouse.com/docs/en/getting-started/example-datasets/github/. [#43191](https://github.com/ClickHouse/ClickHouse/pull/43191) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Fixed unexpected behaviour of `Interval` types with subquery and casting. [#43193](https://github.com/ClickHouse/ClickHouse/pull/43193) ([jh0x](https://github.com/jh0x)). - ### ClickHouse release 22.10, 2022-10-26 #### Backward Incompatible Change @@ -673,7 +570,7 @@ * Support SQL standard CREATE INDEX and DROP INDEX syntax. [#35166](https://github.com/ClickHouse/ClickHouse/pull/35166) ([Jianmei Zhang](https://github.com/zhangjmruc)). * Send profile events for INSERT queries (previously only SELECT was supported). [#37391](https://github.com/ClickHouse/ClickHouse/pull/37391) ([Azat Khuzhin](https://github.com/azat)). * Implement in order aggregation (`optimize_aggregation_in_order`) for fully materialized projections. [#37469](https://github.com/ClickHouse/ClickHouse/pull/37469) ([Azat Khuzhin](https://github.com/azat)). -* Remove subprocess run for Kerberos initialization. Added new integration test. Closes [#27651](https://github.com/ClickHouse/ClickHouse/issues/27651). [#38105](https://github.com/ClickHouse/ClickHouse/pull/38105) ([Roman Vasin](https://github.com/rvasin)). +* Remove subprocess run for kerberos initialization. Added new integration test. Closes [#27651](https://github.com/ClickHouse/ClickHouse/issues/27651). [#38105](https://github.com/ClickHouse/ClickHouse/pull/38105) ([Roman Vasin](https://github.com/rvasin)). * * Add setting `multiple_joins_try_to_keep_original_names` to not rewrite identifier name on multiple JOINs rewrite, close [#34697](https://github.com/ClickHouse/ClickHouse/issues/34697). [#38149](https://github.com/ClickHouse/ClickHouse/pull/38149) ([Vladimir C](https://github.com/vdimir)). * Improved trace-visualizer UX. [#38169](https://github.com/ClickHouse/ClickHouse/pull/38169) ([Sergei Trifonov](https://github.com/serxa)). * Enable stack trace collection and query profiler for AArch64. [#38181](https://github.com/ClickHouse/ClickHouse/pull/38181) ([Maksim Kita](https://github.com/kitaisreal)). @@ -953,8 +850,8 @@ #### Upgrade Notes -* Now, background merges, mutations, and `OPTIMIZE` will not increment `SelectedRows` and `SelectedBytes` metrics. They (still) will increment `MergedRows` and `MergedUncompressedBytes` as it was before. This only affects the metric values and makes them better. This change does not introduce any incompatibility, but you may wonder about the changes to the metrics, so we put in this category. [#37040](https://github.com/ClickHouse/ClickHouse/pull/37040) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Updated the BoringSSL module to the official FIPS compliant version. This makes ClickHouse FIPS compliant in this area. [#35914](https://github.com/ClickHouse/ClickHouse/pull/35914) ([Meena-Renganathan](https://github.com/Meena-Renganathan)). The ciphers `aes-192-cfb128` and `aes-256-cfb128` were removed, because they are not included in the FIPS certified version of BoringSSL. +* Now, background merges, mutations and `OPTIMIZE` will not increment `SelectedRows` and `SelectedBytes` metrics. They (still) will increment `MergedRows` and `MergedUncompressedBytes` as it was before. This only affects the metric values, and makes them better. This change does not introduce any incompatibility, but you may wonder about the changes of metrics, so we put in this category. [#37040](https://github.com/ClickHouse/ClickHouse/pull/37040) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Updated the BoringSSL module to the official FIPS compliant version. This makes ClickHouse FIPS compliant. [#35914](https://github.com/ClickHouse/ClickHouse/pull/35914) ([Meena-Renganathan](https://github.com/Meena-Renganathan)). The ciphers `aes-192-cfb128` and `aes-256-cfb128` were removed, because they are not included in the FIPS certified version of BoringSSL. * `max_memory_usage` setting is removed from the default user profile in `users.xml`. This enables flexible memory limits for queries instead of the old rigid limit of 10 GB. * Disable `log_query_threads` setting by default. It controls the logging of statistics about every thread participating in query execution. After supporting asynchronous reads, the total number of distinct thread ids became too large, and logging into the `query_thread_log` has become too heavy. [#37077](https://github.com/ClickHouse/ClickHouse/pull/37077) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * Remove function `groupArraySorted` which has a bug. [#36822](https://github.com/ClickHouse/ClickHouse/pull/36822) ([Alexey Milovidov](https://github.com/alexey-milovidov)). From 9011a18234f26914104a3762e3e7e8a2e5f99661 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Wed, 16 Nov 2022 22:33:48 +0300 Subject: [PATCH 73/80] Revert "S3 request per second rate throttling" --- .../engines/table-engines/integrations/s3.md | 5 -- .../mergetree-family/mergetree.md | 4 -- src/Backups/BackupIO_S3.cpp | 27 +++++---- src/Backups/BackupIO_S3.h | 5 +- src/Common/ProfileEvents.cpp | 2 +- src/Common/Throttler.cpp | 2 + src/Common/Throttler.h | 2 - src/Coordination/KeeperSnapshotManagerS3.cpp | 12 ++-- src/Core/Settings.h | 4 -- .../ObjectStorages/S3/S3ObjectStorage.cpp | 23 ++++---- src/Disks/ObjectStorages/S3/S3ObjectStorage.h | 6 +- src/Disks/ObjectStorages/S3/diskSettings.cpp | 38 ++++--------- src/Disks/ObjectStorages/S3/diskSettings.h | 2 +- src/Disks/ObjectStorages/S3/parseConfig.h | 27 +++++++++ .../ObjectStorages/S3/registerDiskS3.cpp | 13 +++-- src/IO/ReadBufferFromS3.cpp | 10 ++-- src/IO/ReadBufferFromS3.h | 11 ++-- src/IO/S3/PocoHTTPClient.cpp | 26 +-------- src/IO/S3/PocoHTTPClient.h | 17 +----- src/IO/S3/tests/gtest_aws_s3_client.cpp | 8 +-- src/IO/S3Common.cpp | 31 ++-------- src/IO/S3Common.h | 5 +- src/IO/WriteBufferFromS3.cpp | 20 +++---- src/IO/WriteBufferFromS3.h | 4 +- .../ExternalDataSourceConfiguration.h | 2 +- src/Storages/StorageDelta.cpp | 6 +- src/Storages/StorageHudi.cpp | 2 +- src/Storages/StorageS3.cpp | 56 +++++++++---------- src/Storages/StorageS3.h | 13 ++--- src/Storages/StorageS3Cluster.cpp | 2 +- src/Storages/StorageS3Settings.cpp | 47 +++++----------- src/Storages/StorageS3Settings.h | 19 +++---- .../02477_s3_request_throttler.reference | 2 - .../02477_s3_request_throttler.sql | 26 --------- 34 files changed, 173 insertions(+), 306 deletions(-) create mode 100644 src/Disks/ObjectStorages/S3/parseConfig.h delete mode 100644 tests/queries/0_stateless/02477_s3_request_throttler.reference delete mode 100644 tests/queries/0_stateless/02477_s3_request_throttler.sql diff --git a/docs/en/engines/table-engines/integrations/s3.md b/docs/en/engines/table-engines/integrations/s3.md index 484fd265c3d..db983ab9c68 100644 --- a/docs/en/engines/table-engines/integrations/s3.md +++ b/docs/en/engines/table-engines/integrations/s3.md @@ -127,10 +127,6 @@ The following settings can be set before query execution or placed into configur - `s3_min_upload_part_size` — The minimum size of part to upload during multipart upload to [S3 Multipart upload](https://docs.aws.amazon.com/AmazonS3/latest/dev/uploadobjusingmpu.html). Default value is `512Mb`. - `s3_max_redirects` — Max number of S3 redirects hops allowed. Default value is `10`. - `s3_single_read_retries` — The maximum number of attempts during single read. Default value is `4`. -- `s3_max_put_rps` — Maximum PUT requests per second rate before throttling. Default value is `0` (unlimited). -- `s3_max_put_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_put_rps`. -- `s3_max_get_rps` — Maximum GET requests per second rate before throttling. Default value is `0` (unlimited). -- `s3_max_get_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_get_rps`. Security consideration: if malicious user can specify arbitrary S3 URLs, `s3_max_redirects` must be set to zero to avoid [SSRF](https://en.wikipedia.org/wiki/Server-side_request_forgery) attacks; or alternatively, `remote_host_filter` must be specified in server configuration. @@ -146,7 +142,6 @@ The following settings can be specified in configuration file for given endpoint - `header` — Adds specified HTTP header to a request to given endpoint. Optional, can be specified multiple times. - `server_side_encryption_customer_key_base64` — If specified, required headers for accessing S3 objects with SSE-C encryption will be set. Optional. - `max_single_read_retries` — The maximum number of attempts during single read. Default value is `4`. Optional. -- `max_put_rps`, `max_put_burst`, `max_get_rps` and `max_get_burst` - Throttling settings (see description above) to use for specific endpoint instead of per query. Optional. **Example:** diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index 7614a09c018..ce6cec079a3 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -940,10 +940,6 @@ Optional parameters: - `cache_path` — Path on local FS where to store cached mark and index files. Default value is `/var/lib/clickhouse/disks//cache/`. - `skip_access_check` — If true, disk access checks will not be performed on disk start-up. Default value is `false`. - `server_side_encryption_customer_key_base64` — If specified, required headers for accessing S3 objects with SSE-C encryption will be set. -- `s3_max_put_rps` — Maximum PUT requests per second rate before throttling. Default value is `0` (unlimited). -- `s3_max_put_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_put_rps`. -- `s3_max_get_rps` — Maximum GET requests per second rate before throttling. Default value is `0` (unlimited). -- `s3_max_get_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_get_rps`. S3 disk can be configured as `main` or `cold` storage: ``` xml diff --git a/src/Backups/BackupIO_S3.cpp b/src/Backups/BackupIO_S3.cpp index 8342749e230..f7f7643a6e3 100644 --- a/src/Backups/BackupIO_S3.cpp +++ b/src/Backups/BackupIO_S3.cpp @@ -46,7 +46,7 @@ namespace context->getRemoteHostFilter(), static_cast(context->getGlobalContext()->getSettingsRef().s3_max_redirects), context->getGlobalContext()->getSettingsRef().enable_s3_requests_logging, - /* for_disk_s3 = */ false, /* get_request_throttler = */ {}, /* put_request_throttler = */ {}); + /* for_disk_s3 = */ false); client_configuration.endpointOverride = s3_uri.endpoint; client_configuration.maxConnections = static_cast(context->getSettingsRef().s3_max_connections); @@ -86,10 +86,9 @@ BackupReaderS3::BackupReaderS3( const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, const ContextPtr & context_) : s3_uri(s3_uri_) , client(makeS3Client(s3_uri_, access_key_id_, secret_access_key_, context_)) + , max_single_read_retries(context_->getSettingsRef().s3_max_single_read_retries) , read_settings(context_->getReadSettings()) - , request_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString()).request_settings) { - request_settings.max_single_read_retries = context_->getSettingsRef().s3_max_single_read_retries; // FIXME: Avoid taking value for endpoint } DataSourceDescription BackupReaderS3::getDataSourceDescription() const @@ -116,7 +115,7 @@ UInt64 BackupReaderS3::getFileSize(const String & file_name) std::unique_ptr BackupReaderS3::readFile(const String & file_name) { return std::make_unique( - client, s3_uri.bucket, fs::path(s3_uri.key) / file_name, s3_uri.version_id, request_settings, read_settings); + client, s3_uri.bucket, fs::path(s3_uri.key) / file_name, s3_uri.version_id, max_single_read_retries, read_settings); } @@ -124,12 +123,12 @@ BackupWriterS3::BackupWriterS3( const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, const ContextPtr & context_) : s3_uri(s3_uri_) , client(makeS3Client(s3_uri_, access_key_id_, secret_access_key_, context_)) + , max_single_read_retries(context_->getSettingsRef().s3_max_single_read_retries) , read_settings(context_->getReadSettings()) - , request_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString()).request_settings) + , rw_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString()).rw_settings) , log(&Poco::Logger::get("BackupWriterS3")) { - request_settings.updateFromSettingsIfEmpty(context_->getSettingsRef()); - request_settings.max_single_read_retries = context_->getSettingsRef().s3_max_single_read_retries; // FIXME: Avoid taking value for endpoint + rw_settings.updateFromSettingsIfEmpty(context_->getSettingsRef()); } DataSourceDescription BackupWriterS3::getDataSourceDescription() const @@ -217,7 +216,7 @@ void BackupWriterS3::copyObjectMultipartImpl( std::vector part_tags; size_t position = 0; - size_t upload_part_size = request_settings.min_upload_part_size; + size_t upload_part_size = rw_settings.min_upload_part_size; for (size_t part_number = 1; position < size; ++part_number) { @@ -249,10 +248,10 @@ void BackupWriterS3::copyObjectMultipartImpl( position = next_position; - if (part_number % request_settings.upload_part_size_multiply_parts_count_threshold == 0) + if (part_number % rw_settings.upload_part_size_multiply_parts_count_threshold == 0) { - upload_part_size *= request_settings.upload_part_size_multiply_factor; - upload_part_size = std::min(upload_part_size, request_settings.max_upload_part_size); + upload_part_size *= rw_settings.upload_part_size_multiply_factor; + upload_part_size = std::min(upload_part_size, rw_settings.max_upload_part_size); } } @@ -295,7 +294,7 @@ void BackupWriterS3::copyFileNative(DiskPtr from_disk, const String & file_name_ auto file_path = fs::path(s3_uri.key) / file_name_to; auto head = requestObjectHeadData(source_bucket, objects[0].absolute_path).GetResult(); - if (static_cast(head.GetContentLength()) < request_settings.max_single_operation_copy_size) + if (static_cast(head.GetContentLength()) < rw_settings.max_single_operation_copy_size) { copyObjectImpl( source_bucket, objects[0].absolute_path, s3_uri.bucket, file_path, head); @@ -332,7 +331,7 @@ bool BackupWriterS3::fileContentsEqual(const String & file_name, const String & try { auto in = std::make_unique( - client, s3_uri.bucket, fs::path(s3_uri.key) / file_name, s3_uri.version_id, request_settings, read_settings); + client, s3_uri.bucket, fs::path(s3_uri.key) / file_name, s3_uri.version_id, max_single_read_retries, read_settings); String actual_file_contents(expected_file_contents.size(), ' '); return (in->read(actual_file_contents.data(), actual_file_contents.size()) == actual_file_contents.size()) && (actual_file_contents == expected_file_contents) && in->eof(); @@ -350,7 +349,7 @@ std::unique_ptr BackupWriterS3::writeFile(const String & file_name) client, s3_uri.bucket, fs::path(s3_uri.key) / file_name, - request_settings, + rw_settings, std::nullopt, DBMS_DEFAULT_BUFFER_SIZE, threadPoolCallbackRunner(IOThreadPool::get(), "BackupWriterS3")); diff --git a/src/Backups/BackupIO_S3.h b/src/Backups/BackupIO_S3.h index deaf7979ff8..b52de23e262 100644 --- a/src/Backups/BackupIO_S3.h +++ b/src/Backups/BackupIO_S3.h @@ -39,8 +39,8 @@ public: private: S3::URI s3_uri; std::shared_ptr client; + UInt64 max_single_read_retries; ReadSettings read_settings; - S3Settings::RequestSettings request_settings; }; @@ -81,8 +81,9 @@ private: S3::URI s3_uri; std::shared_ptr client; + UInt64 max_single_read_retries; ReadSettings read_settings; - S3Settings::RequestSettings request_settings; + S3Settings::ReadWriteSettings rw_settings; Poco::Logger * log; }; diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index 90d24ec027e..e30a6bb6aaf 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -62,7 +62,7 @@ M(NetworkSendElapsedMicroseconds, "Total time spent waiting for data to send to network or sending data to network. Only ClickHouse-related network interaction is included, not by 3rd party libraries..") \ M(NetworkReceiveBytes, "Total number of bytes received from network. Only ClickHouse-related network interaction is included, not by 3rd party libraries.") \ M(NetworkSendBytes, "Total number of bytes send to network. Only ClickHouse-related network interaction is included, not by 3rd party libraries.") \ - M(ThrottlerSleepMicroseconds, "Total time a query was sleeping to conform 'max_network_bandwidth' and other throttling settings.") \ + M(ThrottlerSleepMicroseconds, "Total time a query was sleeping to conform the 'max_network_bandwidth' setting.") \ \ M(QueryMaskingRulesMatch, "Number of times query masking rules was successfully matched.") \ \ diff --git a/src/Common/Throttler.cpp b/src/Common/Throttler.cpp index 79625d4894c..b38777efc03 100644 --- a/src/Common/Throttler.cpp +++ b/src/Common/Throttler.cpp @@ -20,6 +20,8 @@ namespace ErrorCodes /// Just 10^9. static constexpr auto NS = 1000000000UL; +static const size_t default_burst_seconds = 1; + Throttler::Throttler(size_t max_speed_, const std::shared_ptr & parent_) : max_speed(max_speed_) , max_burst(max_speed_ * default_burst_seconds) diff --git a/src/Common/Throttler.h b/src/Common/Throttler.h index 708e9b939fa..9b6eff13506 100644 --- a/src/Common/Throttler.h +++ b/src/Common/Throttler.h @@ -17,8 +17,6 @@ namespace DB class Throttler { public: - static const size_t default_burst_seconds = 1; - Throttler(size_t max_speed_, size_t max_burst_, const std::shared_ptr & parent_ = nullptr) : max_speed(max_speed_), max_burst(max_burst_), limit_exceeded_exception_message(""), tokens(max_burst), parent(parent_) {} diff --git a/src/Coordination/KeeperSnapshotManagerS3.cpp b/src/Coordination/KeeperSnapshotManagerS3.cpp index cc837f5f496..2e19d496407 100644 --- a/src/Coordination/KeeperSnapshotManagerS3.cpp +++ b/src/Coordination/KeeperSnapshotManagerS3.cpp @@ -93,7 +93,7 @@ void KeeperSnapshotManagerS3::updateS3Configuration(const Poco::Util::AbstractCo auth_settings.region, RemoteHostFilter(), s3_max_redirects, enable_s3_requests_logging, - /* for_disk_s3 = */ false, /* get_request_throttler = */ {}, /* put_request_throttler = */ {}); + /* for_disk_s3 = */ false); client_configuration.endpointOverride = new_uri.endpoint; @@ -135,8 +135,8 @@ void KeeperSnapshotManagerS3::uploadSnapshotImpl(const std::string & snapshot_pa if (s3_client == nullptr) return; - S3Settings::RequestSettings request_settings_1; - request_settings_1.upload_part_size_multiply_parts_count_threshold = 10000; + S3Settings::ReadWriteSettings read_write_settings; + read_write_settings.upload_part_size_multiply_parts_count_threshold = 10000; const auto create_writer = [&](const auto & key) { @@ -145,7 +145,7 @@ void KeeperSnapshotManagerS3::uploadSnapshotImpl(const std::string & snapshot_pa s3_client->client, s3_client->uri.bucket, key, - request_settings_1 + read_write_settings }; }; @@ -194,15 +194,13 @@ void KeeperSnapshotManagerS3::uploadSnapshotImpl(const std::string & snapshot_pa lock_writer.finalize(); // We read back the written UUID, if it's the same we can upload the file - S3Settings::RequestSettings request_settings_2; - request_settings_2.max_single_read_retries = 1; ReadBufferFromS3 lock_reader { s3_client->client, s3_client->uri.bucket, lock_file, "", - request_settings_2, + 1, {} }; diff --git a/src/Core/Settings.h b/src/Core/Settings.h index bcaea96512d..83252b6f0a9 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -90,10 +90,6 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) M(UInt64, s3_max_unexpected_write_error_retries, 4, "The maximum number of retries in case of unexpected errors during S3 write.", 0) \ M(UInt64, s3_max_redirects, 10, "Max number of S3 redirects hops allowed.", 0) \ M(UInt64, s3_max_connections, 1024, "The maximum number of connections per server.", 0) \ - M(UInt64, s3_max_get_rps, 0, "Limit on S3 GET request per second rate before throttling. Zero means unlimited.", 0) \ - M(UInt64, s3_max_get_burst, 0, "Max number of requests that can be issued simultaneously before hitting request per second limit. By default (0) equals to `s3_max_get_rps`", 0) \ - M(UInt64, s3_max_put_rps, 0, "Limit on S3 PUT request per second rate before throttling. Zero means unlimited.", 0) \ - M(UInt64, s3_max_put_burst, 0, "Max number of requests that can be issued simultaneously before hitting request per second limit. By default (0) equals to `s3_max_put_rps`", 0) \ M(Bool, s3_truncate_on_insert, false, "Enables or disables truncate before insert in s3 engine tables.", 0) \ M(Bool, s3_create_new_file_on_insert, false, "Enables or disables creating a new file on each insert in s3 engine tables", 0) \ M(Bool, s3_check_objects_after_upload, false, "Check each uploaded object to s3 with head request to be sure that upload was successful", 0) \ diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index 11c9345d4a1..099a7d458d0 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -175,7 +175,7 @@ std::unique_ptr S3ObjectStorage::readObjects( /// NOLINT bucket, path, version_id, - settings_ptr->request_settings, + settings_ptr->s3_settings.max_single_read_retries, disk_read_settings, /* use_external_buffer */true, /* offset */0, @@ -212,7 +212,7 @@ std::unique_ptr S3ObjectStorage::readObject( /// NOLINT bucket, object.absolute_path, version_id, - settings_ptr->request_settings, + settings_ptr->s3_settings.max_single_read_retries, patchSettings(read_settings)); } @@ -238,7 +238,7 @@ std::unique_ptr S3ObjectStorage::writeObject( /// NOLIN client.get(), bucket, object.absolute_path, - settings_ptr->request_settings, + settings_ptr->s3_settings, attributes, buf_size, std::move(scheduler), @@ -489,7 +489,7 @@ void S3ObjectStorage::copyObjectImpl( throwIfError(outcome); auto settings_ptr = s3_settings.get(); - if (settings_ptr->request_settings.check_objects_after_upload) + if (settings_ptr->s3_settings.check_objects_after_upload) { auto object_head = requestObjectHeadData(dst_bucket, dst_key); if (!object_head.IsSuccess()) @@ -533,7 +533,7 @@ void S3ObjectStorage::copyObjectMultipartImpl( std::vector part_tags; - size_t upload_part_size = settings_ptr->request_settings.min_upload_part_size; + size_t upload_part_size = settings_ptr->s3_settings.min_upload_part_size; for (size_t position = 0, part_number = 1; position < size; ++part_number, position += upload_part_size) { ProfileEvents::increment(ProfileEvents::S3UploadPartCopy); @@ -586,7 +586,7 @@ void S3ObjectStorage::copyObjectMultipartImpl( throwIfError(outcome); } - if (settings_ptr->request_settings.check_objects_after_upload) + if (settings_ptr->s3_settings.check_objects_after_upload) { auto object_head = requestObjectHeadData(dst_bucket, dst_key); if (!object_head.IsSuccess()) @@ -643,20 +643,17 @@ void S3ObjectStorage::startup() void S3ObjectStorage::applyNewSettings(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) { - auto new_s3_settings = getSettings(config, config_prefix, context); - auto new_client = getClient(config, config_prefix, context, *new_s3_settings); - s3_settings.set(std::move(new_s3_settings)); - client.set(std::move(new_client)); + s3_settings.set(getSettings(config, config_prefix, context)); + client.set(getClient(config, config_prefix, context)); applyRemoteThrottlingSettings(context); } std::unique_ptr S3ObjectStorage::cloneObjectStorage( const std::string & new_namespace, const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) { - auto new_s3_settings = getSettings(config, config_prefix, context); - auto new_client = getClient(config, config_prefix, context, *new_s3_settings); return std::make_unique( - std::move(new_client), std::move(new_s3_settings), + getClient(config, config_prefix, context), + getSettings(config, config_prefix, context), version_id, s3_capabilities, new_namespace, S3::URI(Poco::URI(config.getString(config_prefix + ".endpoint"))).endpoint); } diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h index 57ceaa679e0..447ca034aac 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h @@ -23,17 +23,17 @@ struct S3ObjectStorageSettings S3ObjectStorageSettings() = default; S3ObjectStorageSettings( - const S3Settings::RequestSettings & request_settings_, + const S3Settings::ReadWriteSettings & s3_settings_, uint64_t min_bytes_for_seek_, int32_t list_object_keys_size_, int32_t objects_chunk_size_to_delete_) - : request_settings(request_settings_) + : s3_settings(s3_settings_) , min_bytes_for_seek(min_bytes_for_seek_) , list_object_keys_size(list_object_keys_size_) , objects_chunk_size_to_delete(objects_chunk_size_to_delete_) {} - S3Settings::RequestSettings request_settings; + S3Settings::ReadWriteSettings s3_settings; uint64_t min_bytes_for_seek; int32_t list_object_keys_size; diff --git a/src/Disks/ObjectStorages/S3/diskSettings.cpp b/src/Disks/ObjectStorages/S3/diskSettings.cpp index ca2e9d04926..e61987163d2 100644 --- a/src/Disks/ObjectStorages/S3/diskSettings.cpp +++ b/src/Disks/ObjectStorages/S3/diskSettings.cpp @@ -4,7 +4,6 @@ #include #include -#include #include #include #include @@ -33,26 +32,17 @@ namespace ErrorCodes std::unique_ptr getSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context) { - const Settings & settings = context->getSettingsRef(); - S3Settings::RequestSettings request_settings; - request_settings.max_single_read_retries = config.getUInt64(config_prefix + ".s3_max_single_read_retries", settings.s3_max_single_read_retries); - request_settings.min_upload_part_size = config.getUInt64(config_prefix + ".s3_min_upload_part_size", settings.s3_min_upload_part_size); - request_settings.upload_part_size_multiply_factor = config.getUInt64(config_prefix + ".s3_upload_part_size_multiply_factor", settings.s3_upload_part_size_multiply_factor); - request_settings.upload_part_size_multiply_parts_count_threshold = config.getUInt64(config_prefix + ".s3_upload_part_size_multiply_parts_count_threshold", settings.s3_upload_part_size_multiply_parts_count_threshold); - request_settings.max_single_part_upload_size = config.getUInt64(config_prefix + ".s3_max_single_part_upload_size", settings.s3_max_single_part_upload_size); - request_settings.check_objects_after_upload = config.getUInt64(config_prefix + ".s3_check_objects_after_upload", settings.s3_check_objects_after_upload); - request_settings.max_unexpected_write_error_retries = config.getUInt64(config_prefix + ".s3_max_unexpected_write_error_retries", settings.s3_max_unexpected_write_error_retries); - - // NOTE: it would be better to reuse old throttlers to avoid losing token bucket state on every config reload, which could lead to exceeding limit for short time. But it is good enough unless very high `burst` values are used. - if (UInt64 max_get_rps = config.getUInt64(config_prefix + ".s3_max_get_rps", settings.s3_max_get_rps)) - request_settings.get_request_throttler = std::make_shared( - max_get_rps, config.getUInt64(config_prefix + ".s3_max_get_burst", settings.s3_max_get_burst ? settings.s3_max_get_burst : Throttler::default_burst_seconds * max_get_rps)); - if (UInt64 max_put_rps = config.getUInt64(config_prefix + ".s3_max_put_rps", settings.s3_max_put_rps)) - request_settings.put_request_throttler = std::make_shared( - max_put_rps, config.getUInt64(config_prefix + ".s3_max_put_burst", settings.s3_max_put_burst ? settings.s3_max_put_burst : Throttler::default_burst_seconds * max_put_rps)); + S3Settings::ReadWriteSettings rw_settings; + rw_settings.max_single_read_retries = config.getUInt64(config_prefix + ".s3_max_single_read_retries", context->getSettingsRef().s3_max_single_read_retries); + rw_settings.min_upload_part_size = config.getUInt64(config_prefix + ".s3_min_upload_part_size", context->getSettingsRef().s3_min_upload_part_size); + rw_settings.upload_part_size_multiply_factor = config.getUInt64(config_prefix + ".s3_upload_part_size_multiply_factor", context->getSettingsRef().s3_upload_part_size_multiply_factor); + rw_settings.upload_part_size_multiply_parts_count_threshold = config.getUInt64(config_prefix + ".s3_upload_part_size_multiply_parts_count_threshold", context->getSettingsRef().s3_upload_part_size_multiply_parts_count_threshold); + rw_settings.max_single_part_upload_size = config.getUInt64(config_prefix + ".s3_max_single_part_upload_size", context->getSettingsRef().s3_max_single_part_upload_size); + rw_settings.check_objects_after_upload = config.getUInt64(config_prefix + ".s3_check_objects_after_upload", context->getSettingsRef().s3_check_objects_after_upload); + rw_settings.max_unexpected_write_error_retries = config.getUInt64(config_prefix + ".s3_max_unexpected_write_error_retries", context->getSettingsRef().s3_max_unexpected_write_error_retries); return std::make_unique( - request_settings, + rw_settings, config.getUInt64(config_prefix + ".min_bytes_for_seek", 1024 * 1024), config.getInt(config_prefix + ".list_object_keys_size", 1000), config.getInt(config_prefix + ".objects_chunk_size_to_delete", 1000)); @@ -122,20 +112,14 @@ std::shared_ptr getProxyConfiguration(const String & pre } -std::unique_ptr getClient( - const Poco::Util::AbstractConfiguration & config, - const String & config_prefix, - ContextPtr context, - const S3ObjectStorageSettings & settings) +std::unique_ptr getClient(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context) { S3::PocoHTTPClientConfiguration client_configuration = S3::ClientFactory::instance().createClientConfiguration( config.getString(config_prefix + ".region", ""), context->getRemoteHostFilter(), static_cast(context->getGlobalContext()->getSettingsRef().s3_max_redirects), context->getGlobalContext()->getSettingsRef().enable_s3_requests_logging, - /* for_disk_s3 = */ true, - settings.request_settings.get_request_throttler, - settings.request_settings.put_request_throttler); + /* for_disk_s3 = */ true); S3::URI uri(Poco::URI(config.getString(config_prefix + ".endpoint"))); if (uri.key.back() != '/') diff --git a/src/Disks/ObjectStorages/S3/diskSettings.h b/src/Disks/ObjectStorages/S3/diskSettings.h index 04eb7aced8e..05ba8819f83 100644 --- a/src/Disks/ObjectStorages/S3/diskSettings.h +++ b/src/Disks/ObjectStorages/S3/diskSettings.h @@ -22,7 +22,7 @@ struct S3ObjectStorageSettings; std::unique_ptr getSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context); -std::unique_ptr getClient(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context, const S3ObjectStorageSettings & settings); +std::unique_ptr getClient(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context); } diff --git a/src/Disks/ObjectStorages/S3/parseConfig.h b/src/Disks/ObjectStorages/S3/parseConfig.h new file mode 100644 index 00000000000..1defc673c2e --- /dev/null +++ b/src/Disks/ObjectStorages/S3/parseConfig.h @@ -0,0 +1,27 @@ +#pragma once + +#include "config.h" + +#if USE_AWS_S3 + +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + + +std::unique_ptr getSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context); + +std::shared_ptr getClient(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context); + + +} diff --git a/src/Disks/ObjectStorages/S3/registerDiskS3.cpp b/src/Disks/ObjectStorages/S3/registerDiskS3.cpp index e73accbb956..e09aef22122 100644 --- a/src/Disks/ObjectStorages/S3/registerDiskS3.cpp +++ b/src/Disks/ObjectStorages/S3/registerDiskS3.cpp @@ -130,16 +130,21 @@ void registerDiskS3(DiskFactory & factory) chassert(type == "s3" || type == "s3_plain"); MetadataStoragePtr metadata_storage; - auto settings = getSettings(config, config_prefix, context); - auto client = getClient(config, config_prefix, context, *settings); if (type == "s3_plain") { - s3_storage = std::make_shared(std::move(client), std::move(settings), uri.version_id, s3_capabilities, uri.bucket, uri.endpoint); + s3_storage = std::make_shared( + getClient(config, config_prefix, context), + getSettings(config, config_prefix, context), + uri.version_id, s3_capabilities, uri.bucket, uri.endpoint); metadata_storage = std::make_shared(s3_storage, uri.key); } else { - s3_storage = std::make_shared(std::move(client), std::move(settings), uri.version_id, s3_capabilities, uri.bucket, uri.endpoint); + s3_storage = std::make_shared( + getClient(config, config_prefix, context), + getSettings(config, config_prefix, context), + uri.version_id, s3_capabilities, uri.bucket, uri.endpoint); + auto [metadata_path, metadata_disk] = prepareForLocalMetadata(name, config, config_prefix, context); metadata_storage = std::make_shared(metadata_disk, uri.key); } diff --git a/src/IO/ReadBufferFromS3.cpp b/src/IO/ReadBufferFromS3.cpp index c14fbecf223..c49941b025d 100644 --- a/src/IO/ReadBufferFromS3.cpp +++ b/src/IO/ReadBufferFromS3.cpp @@ -45,7 +45,7 @@ ReadBufferFromS3::ReadBufferFromS3( const String & bucket_, const String & key_, const String & version_id_, - const S3Settings::RequestSettings & request_settings_, + UInt64 max_single_read_retries_, const ReadSettings & settings_, bool use_external_buffer_, size_t offset_, @@ -56,7 +56,7 @@ ReadBufferFromS3::ReadBufferFromS3( , bucket(bucket_) , key(key_) , version_id(version_id_) - , request_settings(request_settings_) + , max_single_read_retries(max_single_read_retries_) , offset(offset_) , read_until_position(read_until_position_) , read_settings(settings_) @@ -105,7 +105,7 @@ bool ReadBufferFromS3::nextImpl() } size_t sleep_time_with_backoff_milliseconds = 100; - for (size_t attempt = 0; attempt < request_settings.max_single_read_retries && !next_result; ++attempt) + for (size_t attempt = 0; (attempt < max_single_read_retries) && !next_result; ++attempt) { Stopwatch watch; try @@ -166,7 +166,7 @@ bool ReadBufferFromS3::nextImpl() attempt, e.message()); - if (attempt + 1 == request_settings.max_single_read_retries) + if (attempt + 1 == max_single_read_retries) throw; /// Pause before next attempt. @@ -349,7 +349,7 @@ SeekableReadBufferPtr ReadBufferS3Factory::getReader() bucket, key, version_id, - request_settings, + s3_max_single_read_retries, read_settings, false /*use_external_buffer*/, next_range->first, diff --git a/src/IO/ReadBufferFromS3.h b/src/IO/ReadBufferFromS3.h index 17b13bf7d62..cc836bba495 100644 --- a/src/IO/ReadBufferFromS3.h +++ b/src/IO/ReadBufferFromS3.h @@ -1,7 +1,6 @@ #pragma once #include -#include #include "config.h" #if USE_AWS_S3 @@ -34,7 +33,7 @@ private: String bucket; String key; String version_id; - const S3Settings::RequestSettings request_settings; + UInt64 max_single_read_retries; /// These variables are atomic because they can be used for `logging only` /// (where it is not important to get consistent result) @@ -53,7 +52,7 @@ public: const String & bucket_, const String & key_, const String & version_id_, - const S3Settings::RequestSettings & request_settings_, + UInt64 max_single_read_retries_, const ReadSettings & settings_, bool use_external_buffer = false, size_t offset_ = 0, @@ -101,7 +100,7 @@ public: const String & version_id_, size_t range_step_, size_t object_size_, - const S3Settings::RequestSettings & request_settings_, + UInt64 s3_max_single_read_retries_, const ReadSettings & read_settings_) : client_ptr(client_ptr_) , bucket(bucket_) @@ -111,7 +110,7 @@ public: , range_generator(object_size_, range_step_) , range_step(range_step_) , object_size(object_size_) - , request_settings(request_settings_) + , s3_max_single_read_retries(s3_max_single_read_retries_) { assert(range_step > 0); assert(range_step < object_size); @@ -136,7 +135,7 @@ private: size_t range_step; size_t object_size; - const S3Settings::RequestSettings request_settings; + UInt64 s3_max_single_read_retries; }; } diff --git a/src/IO/S3/PocoHTTPClient.cpp b/src/IO/S3/PocoHTTPClient.cpp index 33dd3250c9f..7d053bebe4a 100644 --- a/src/IO/S3/PocoHTTPClient.cpp +++ b/src/IO/S3/PocoHTTPClient.cpp @@ -11,7 +11,6 @@ #include #include -#include #include #include #include @@ -77,16 +76,12 @@ PocoHTTPClientConfiguration::PocoHTTPClientConfiguration( const RemoteHostFilter & remote_host_filter_, unsigned int s3_max_redirects_, bool enable_s3_requests_logging_, - bool for_disk_s3_, - const ThrottlerPtr & get_request_throttler_, - const ThrottlerPtr & put_request_throttler_) + bool for_disk_s3_) : force_region(force_region_) , remote_host_filter(remote_host_filter_) , s3_max_redirects(s3_max_redirects_) , enable_s3_requests_logging(enable_s3_requests_logging_) , for_disk_s3(for_disk_s3_) - , get_request_throttler(get_request_throttler_) - , put_request_throttler(put_request_throttler_) { } @@ -133,8 +128,6 @@ PocoHTTPClient::PocoHTTPClient(const PocoHTTPClientConfiguration & client_config , s3_max_redirects(client_configuration.s3_max_redirects) , enable_s3_requests_logging(client_configuration.enable_s3_requests_logging) , for_disk_s3(client_configuration.for_disk_s3) - , get_request_throttler(client_configuration.get_request_throttler) - , put_request_throttler(client_configuration.put_request_throttler) , extra_headers(client_configuration.extra_headers) { } @@ -252,23 +245,6 @@ void PocoHTTPClient::makeRequestInternal( if (enable_s3_requests_logging) LOG_TEST(log, "Make request to: {}", uri); - switch (request.GetMethod()) - { - case Aws::Http::HttpMethod::HTTP_GET: - case Aws::Http::HttpMethod::HTTP_HEAD: - if (get_request_throttler) - get_request_throttler->add(1); - break; - case Aws::Http::HttpMethod::HTTP_PUT: - case Aws::Http::HttpMethod::HTTP_POST: - case Aws::Http::HttpMethod::HTTP_PATCH: - if (put_request_throttler) - put_request_throttler->add(1); - break; - case Aws::Http::HttpMethod::HTTP_DELETE: - break; // Not throttled - } - addMetric(request, S3MetricType::Count); CurrentMetrics::Increment metric_increment{CurrentMetrics::S3Requests}; diff --git a/src/IO/S3/PocoHTTPClient.h b/src/IO/S3/PocoHTTPClient.h index ed6e1793c2c..5649638285d 100644 --- a/src/IO/S3/PocoHTTPClient.h +++ b/src/IO/S3/PocoHTTPClient.h @@ -8,7 +8,6 @@ #if USE_AWS_S3 #include -#include #include #include #include @@ -49,8 +48,6 @@ struct PocoHTTPClientConfiguration : public Aws::Client::ClientConfiguration unsigned int s3_max_redirects; bool enable_s3_requests_logging; bool for_disk_s3; - ThrottlerPtr get_request_throttler; - ThrottlerPtr put_request_throttler; HeaderCollection extra_headers; void updateSchemeAndRegion(); @@ -63,9 +60,7 @@ private: const RemoteHostFilter & remote_host_filter_, unsigned int s3_max_redirects_, bool enable_s3_requests_logging_, - bool for_disk_s3_, - const ThrottlerPtr & get_request_throttler_, - const ThrottlerPtr & put_request_throttler_ + bool for_disk_s3_ ); /// Constructor of Aws::Client::ClientConfiguration must be called after AWS SDK initialization. @@ -159,16 +154,6 @@ private: unsigned int s3_max_redirects; bool enable_s3_requests_logging; bool for_disk_s3; - - /// Limits get request per second rate for GET, SELECT and all other requests, excluding throttled by put throttler - /// (i.e. throttles GetObject, HeadObject) - ThrottlerPtr get_request_throttler; - - /// Limits put request per second rate for PUT, COPY, POST, LIST requests - /// (i.e. throttles PutObject, CopyObject, ListObjects, CreateMultipartUpload, UploadPartCopy, UploadPart, CompleteMultipartUpload) - /// NOTE: DELETE and CANCEL requests are not throttled by either put or get throttler - ThrottlerPtr put_request_throttler; - const HeaderCollection extra_headers; }; diff --git a/src/IO/S3/tests/gtest_aws_s3_client.cpp b/src/IO/S3/tests/gtest_aws_s3_client.cpp index 2d298ca5df2..9b2a65d84fc 100644 --- a/src/IO/S3/tests/gtest_aws_s3_client.cpp +++ b/src/IO/S3/tests/gtest_aws_s3_client.cpp @@ -88,9 +88,7 @@ TEST(IOTestAwsS3Client, AppendExtraSSECHeaders) remote_host_filter, s3_max_redirects, enable_s3_requests_logging, - /* for_disk_s3 = */ false, - /* get_request_throttler = */ {}, - /* put_request_throttler = */ {} + /* for_disk_s3 = */ false ); client_configuration.endpointOverride = uri.endpoint; @@ -115,14 +113,12 @@ TEST(IOTestAwsS3Client, AppendExtraSSECHeaders) ASSERT_TRUE(client); DB::ReadSettings read_settings; - DB::S3Settings::RequestSettings request_settings; - request_settings.max_single_read_retries = max_single_read_retries; DB::ReadBufferFromS3 read_buffer( client, uri.bucket, uri.key, version_id, - request_settings, + max_single_read_retries, read_settings ); diff --git a/src/IO/S3Common.cpp b/src/IO/S3Common.cpp index 5af09275df4..859f5ce796b 100644 --- a/src/IO/S3Common.cpp +++ b/src/IO/S3Common.cpp @@ -573,14 +573,7 @@ public: /// AWS API tries credentials providers one by one. Some of providers (like ProfileConfigFileAWSCredentialsProvider) can be /// quite verbose even if nobody configured them. So we use our provider first and only after it use default providers. { - DB::S3::PocoHTTPClientConfiguration aws_client_configuration = DB::S3::ClientFactory::instance().createClientConfiguration( - configuration.region, - configuration.remote_host_filter, - configuration.s3_max_redirects, - configuration.enable_s3_requests_logging, - configuration.for_disk_s3, - configuration.get_request_throttler, - configuration.put_request_throttler); + DB::S3::PocoHTTPClientConfiguration aws_client_configuration = DB::S3::ClientFactory::instance().createClientConfiguration(configuration.region, configuration.remote_host_filter, configuration.s3_max_redirects, configuration.enable_s3_requests_logging, configuration.for_disk_s3); AddProvider(std::make_shared(aws_client_configuration)); } @@ -617,14 +610,7 @@ public: } else if (Aws::Utils::StringUtils::ToLower(ec2_metadata_disabled.c_str()) != "true") { - DB::S3::PocoHTTPClientConfiguration aws_client_configuration = DB::S3::ClientFactory::instance().createClientConfiguration( - configuration.region, - configuration.remote_host_filter, - configuration.s3_max_redirects, - configuration.enable_s3_requests_logging, - configuration.for_disk_s3, - configuration.get_request_throttler, - configuration.put_request_throttler); + DB::S3::PocoHTTPClientConfiguration aws_client_configuration = DB::S3::ClientFactory::instance().createClientConfiguration(configuration.region, configuration.remote_host_filter, configuration.s3_max_redirects, configuration.enable_s3_requests_logging, configuration.for_disk_s3); /// See MakeDefaultHttpResourceClientConfiguration(). /// This is part of EC2 metadata client, but unfortunately it can't be accessed from outside @@ -745,18 +731,9 @@ namespace S3 const RemoteHostFilter & remote_host_filter, unsigned int s3_max_redirects, bool enable_s3_requests_logging, - bool for_disk_s3, - const ThrottlerPtr & get_request_throttler, - const ThrottlerPtr & put_request_throttler) + bool for_disk_s3) { - return PocoHTTPClientConfiguration( - force_region, - remote_host_filter, - s3_max_redirects, - enable_s3_requests_logging, - for_disk_s3, - get_request_throttler, - put_request_throttler); + return PocoHTTPClientConfiguration(force_region, remote_host_filter, s3_max_redirects, enable_s3_requests_logging, for_disk_s3); } URI::URI(const Poco::URI & uri_) diff --git a/src/IO/S3Common.h b/src/IO/S3Common.h index 01a6b8d5d82..93e5eb78c7f 100644 --- a/src/IO/S3Common.h +++ b/src/IO/S3Common.h @@ -17,7 +17,6 @@ #include #include -#include namespace Aws::S3 { @@ -89,9 +88,7 @@ public: const RemoteHostFilter & remote_host_filter, unsigned int s3_max_redirects, bool enable_s3_requests_logging, - bool for_disk_s3, - const ThrottlerPtr & get_request_throttler, - const ThrottlerPtr & put_request_throttler); + bool for_disk_s3); private: ClientFactory(); diff --git a/src/IO/WriteBufferFromS3.cpp b/src/IO/WriteBufferFromS3.cpp index 56d487f165b..9ed2c41fd01 100644 --- a/src/IO/WriteBufferFromS3.cpp +++ b/src/IO/WriteBufferFromS3.cpp @@ -71,7 +71,7 @@ WriteBufferFromS3::WriteBufferFromS3( std::shared_ptr client_ptr_, const String & bucket_, const String & key_, - const S3Settings::RequestSettings & request_settings_, + const S3Settings::ReadWriteSettings & s3_settings_, std::optional> object_metadata_, size_t buffer_size_, ThreadPoolCallbackRunner schedule_, @@ -79,10 +79,10 @@ WriteBufferFromS3::WriteBufferFromS3( : BufferWithOwnMemory(buffer_size_, nullptr, 0) , bucket(bucket_) , key(key_) - , request_settings(request_settings_) + , s3_settings(s3_settings_) , client_ptr(std::move(client_ptr_)) , object_metadata(std::move(object_metadata_)) - , upload_part_size(request_settings_.min_upload_part_size) + , upload_part_size(s3_settings_.min_upload_part_size) , schedule(std::move(schedule_)) , write_settings(write_settings_) { @@ -107,7 +107,7 @@ void WriteBufferFromS3::nextImpl() write_settings.remote_throttler->add(offset()); /// Data size exceeds singlepart upload threshold, need to use multipart upload. - if (multipart_upload_id.empty() && last_part_size > request_settings.max_single_part_upload_size) + if (multipart_upload_id.empty() && last_part_size > s3_settings.max_single_part_upload_size) createMultipartUpload(); if (!multipart_upload_id.empty() && last_part_size > upload_part_size) @@ -122,10 +122,10 @@ void WriteBufferFromS3::nextImpl() void WriteBufferFromS3::allocateBuffer() { - if (total_parts_uploaded != 0 && total_parts_uploaded % request_settings.upload_part_size_multiply_parts_count_threshold == 0) + if (total_parts_uploaded != 0 && total_parts_uploaded % s3_settings.upload_part_size_multiply_parts_count_threshold == 0) { - upload_part_size *= request_settings.upload_part_size_multiply_factor; - upload_part_size = std::min(upload_part_size, request_settings.max_upload_part_size); + upload_part_size *= s3_settings.upload_part_size_multiply_factor; + upload_part_size = std::min(upload_part_size, s3_settings.max_upload_part_size); } temporary_buffer = Aws::MakeShared("temporary buffer"); @@ -180,7 +180,7 @@ void WriteBufferFromS3::finalizeImpl() if (!multipart_upload_id.empty()) completeMultipartUpload(); - if (request_settings.check_objects_after_upload) + if (s3_settings.check_objects_after_upload) { LOG_TRACE(log, "Checking object {} exists after upload", key); @@ -370,7 +370,7 @@ void WriteBufferFromS3::completeMultipartUpload() req.SetMultipartUpload(multipart_upload); - size_t max_retry = std::max(request_settings.max_unexpected_write_error_retries, 1UL); + size_t max_retry = std::max(s3_settings.max_unexpected_write_error_retries, 1UL); for (size_t i = 0; i < max_retry; ++i) { ProfileEvents::increment(ProfileEvents::S3CompleteMultipartUpload); @@ -476,7 +476,7 @@ void WriteBufferFromS3::fillPutRequest(Aws::S3::Model::PutObjectRequest & req) void WriteBufferFromS3::processPutRequest(const PutObjectTask & task) { - size_t max_retry = std::max(request_settings.max_unexpected_write_error_retries, 1UL); + size_t max_retry = std::max(s3_settings.max_unexpected_write_error_retries, 1UL); for (size_t i = 0; i < max_retry; ++i) { ProfileEvents::increment(ProfileEvents::S3PutObject); diff --git a/src/IO/WriteBufferFromS3.h b/src/IO/WriteBufferFromS3.h index b4b5a6d37a3..28f831856d7 100644 --- a/src/IO/WriteBufferFromS3.h +++ b/src/IO/WriteBufferFromS3.h @@ -50,7 +50,7 @@ public: std::shared_ptr client_ptr_, const String & bucket_, const String & key_, - const S3Settings::RequestSettings & request_settings_, + const S3Settings::ReadWriteSettings & s3_settings_, std::optional> object_metadata_ = std::nullopt, size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE, ThreadPoolCallbackRunner schedule_ = {}, @@ -88,7 +88,7 @@ private: const String bucket; const String key; - const S3Settings::RequestSettings request_settings; + const S3Settings::ReadWriteSettings s3_settings; const std::shared_ptr client_ptr; const std::optional> object_metadata; diff --git a/src/Storages/ExternalDataSourceConfiguration.h b/src/Storages/ExternalDataSourceConfiguration.h index a8c27e3d1d4..5736336983a 100644 --- a/src/Storages/ExternalDataSourceConfiguration.h +++ b/src/Storages/ExternalDataSourceConfiguration.h @@ -118,7 +118,7 @@ struct URLBasedDataSourceConfiguration struct StorageS3Configuration : URLBasedDataSourceConfiguration { S3::AuthSettings auth_settings; - S3Settings::RequestSettings request_settings; + S3Settings::ReadWriteSettings rw_settings; }; diff --git a/src/Storages/StorageDelta.cpp b/src/Storages/StorageDelta.cpp index c077b24c610..e8287a2fd61 100644 --- a/src/Storages/StorageDelta.cpp +++ b/src/Storages/StorageDelta.cpp @@ -151,14 +151,12 @@ std::vector JsonMetadataGetter::getJsonLogFiles() std::shared_ptr JsonMetadataGetter::createS3ReadBuffer(const String & key, ContextPtr context) { /// TODO: add parallel downloads - S3Settings::RequestSettings request_settings; - request_settings.max_single_read_retries = 10; return std::make_shared( base_configuration.client, base_configuration.uri.bucket, key, base_configuration.uri.version_id, - request_settings, + /* max single read retries */10, context->getReadSettings()); } @@ -189,7 +187,7 @@ StorageDelta::StorageDelta( ContextPtr context_, std::optional format_settings_) : IStorage(table_id_) - , base_configuration{configuration_.url, configuration_.auth_settings, configuration_.request_settings, configuration_.headers} + , base_configuration{configuration_.url, configuration_.auth_settings, configuration_.rw_settings, configuration_.headers} , log(&Poco::Logger::get("StorageDeltaLake (" + table_id_.table_name + ")")) , table_path(base_configuration.uri.key) { diff --git a/src/Storages/StorageHudi.cpp b/src/Storages/StorageHudi.cpp index 4b20e4cbd22..121856c4a57 100644 --- a/src/Storages/StorageHudi.cpp +++ b/src/Storages/StorageHudi.cpp @@ -37,7 +37,7 @@ StorageHudi::StorageHudi( ContextPtr context_, std::optional format_settings_) : IStorage(table_id_) - , base_configuration{configuration_.url, configuration_.auth_settings, configuration_.request_settings, configuration_.headers} + , base_configuration{configuration_.url, configuration_.auth_settings, configuration_.rw_settings, configuration_.headers} , log(&Poco::Logger::get("StorageHudi (" + table_id_.table_name + ")")) , table_path(base_configuration.uri.key) { diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index 7b575d0d12f..ab9b71f5ff3 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -100,8 +100,7 @@ public: const Block & virtual_header_, ContextPtr context_, std::unordered_map * object_infos_, - Strings * read_keys_, - const S3Settings::RequestSettings & request_settings_) + Strings * read_keys_) : WithContext(context_) , client(client_) , globbed_uri(globbed_uri_) @@ -109,7 +108,6 @@ public: , virtual_header(virtual_header_) , object_infos(object_infos_) , read_keys(read_keys_) - , request_settings(request_settings_) { if (globbed_uri.bucket.find_first_of("*?{") != globbed_uri.bucket.npos) throw Exception("Expression can not have wildcards inside bucket name", ErrorCodes::UNEXPECTED_EXPRESSION); @@ -260,7 +258,6 @@ private: bool is_finished{false}; std::unordered_map * object_infos; Strings * read_keys; - S3Settings::RequestSettings request_settings; }; StorageS3Source::DisclosedGlobIterator::DisclosedGlobIterator( @@ -270,9 +267,8 @@ StorageS3Source::DisclosedGlobIterator::DisclosedGlobIterator( const Block & virtual_header, ContextPtr context, std::unordered_map * object_infos_, - Strings * read_keys_, - const S3Settings::RequestSettings & request_settings_) - : pimpl(std::make_shared(client_, globbed_uri_, query, virtual_header, context, object_infos_, read_keys_, request_settings_)) + Strings * read_keys_) + : pimpl(std::make_shared(client_, globbed_uri_, query, virtual_header, context, object_infos_, read_keys_)) { } @@ -385,7 +381,7 @@ StorageS3Source::StorageS3Source( std::optional format_settings_, const ColumnsDescription & columns_, UInt64 max_block_size_, - const S3Settings::RequestSettings & request_settings_, + UInt64 max_single_read_retries_, String compression_hint_, const std::shared_ptr & client_, const String & bucket_, @@ -401,7 +397,7 @@ StorageS3Source::StorageS3Source( , format(format_) , columns_desc(columns_) , max_block_size(max_block_size_) - , request_settings(request_settings_) + , max_single_read_retries(max_single_read_retries_) , compression_hint(std::move(compression_hint_)) , client(client_) , sample_block(sample_block_) @@ -467,7 +463,7 @@ std::unique_ptr StorageS3Source::createS3ReadBuffer(const String & k if (!use_parallel_download || object_too_small) { LOG_TRACE(log, "Downloading object of size {} from S3 in single thread", object_size); - return std::make_unique(client, bucket, key, version_id, request_settings, getContext()->getReadSettings()); + return std::make_unique(client, bucket, key, version_id, max_single_read_retries, getContext()->getReadSettings()); } assert(object_size > 0); @@ -479,7 +475,7 @@ std::unique_ptr StorageS3Source::createS3ReadBuffer(const String & k } auto factory = std::make_unique( - client, bucket, key, version_id, download_buffer_size, object_size, request_settings, getContext()->getReadSettings()); + client, bucket, key, version_id, download_buffer_size, object_size, max_single_read_retries, getContext()->getReadSettings()); LOG_TRACE( log, "Downloading from S3 in {} threads. Object size: {}, Range size: {}.", download_thread_num, object_size, download_buffer_size); @@ -589,7 +585,7 @@ public: s3_configuration_.client, bucket, key, - s3_configuration_.request_settings, + s3_configuration_.rw_settings, std::nullopt, DBMS_DEFAULT_BUFFER_SIZE, threadPoolCallbackRunner(IOThreadPool::get(), "S3ParallelRead"), @@ -753,7 +749,7 @@ StorageS3::StorageS3( bool distributed_processing_, ASTPtr partition_by_) : IStorage(table_id_) - , s3_configuration{configuration_.url, configuration_.auth_settings, configuration_.request_settings, configuration_.headers} + , s3_configuration{configuration_.url, configuration_.auth_settings, configuration_.rw_settings, configuration_.headers} , keys({s3_configuration.uri.key}) , format_name(configuration_.format) , compression_method(configuration_.compression_method) @@ -819,7 +815,7 @@ std::shared_ptr StorageS3::createFileIterator( { /// Iterate through disclosed globs and make a source for each file auto glob_iterator = std::make_shared( - *s3_configuration.client, s3_configuration.uri, query, virtual_block, local_context, object_infos, read_keys, s3_configuration.request_settings); + *s3_configuration.client, s3_configuration.uri, query, virtual_block, local_context, object_infos, read_keys); return std::make_shared([glob_iterator]() { return glob_iterator->next(); }); } else @@ -909,7 +905,7 @@ Pipe StorageS3::read( format_settings, columns_description, max_block_size, - s3_configuration.request_settings, + s3_configuration.rw_settings.max_single_read_retries, compression_method, s3_configuration.client, s3_configuration.uri.bucket, @@ -1026,10 +1022,12 @@ void StorageS3::truncate(const ASTPtr & /* query */, const StorageMetadataPtr &, void StorageS3::updateS3Configuration(ContextPtr ctx, StorageS3::S3Configuration & upd) { auto settings = ctx->getStorageS3Settings().getSettings(upd.uri.uri.toString()); - if (upd.request_settings != settings.request_settings) - upd.request_settings = settings.request_settings; + const auto & config_rw_settings = settings.rw_settings; - upd.request_settings.updateFromSettingsIfEmpty(ctx->getSettings()); + if (upd.rw_settings != config_rw_settings) + upd.rw_settings = settings.rw_settings; + + upd.rw_settings.updateFromSettingsIfEmpty(ctx->getSettings()); if (upd.client) { @@ -1047,12 +1045,10 @@ void StorageS3::updateS3Configuration(ContextPtr ctx, StorageS3::S3Configuration ctx->getRemoteHostFilter(), static_cast(ctx->getGlobalContext()->getSettingsRef().s3_max_redirects), ctx->getGlobalContext()->getSettingsRef().enable_s3_requests_logging, - /* for_disk_s3 = */ false, - upd.request_settings.get_request_throttler, - upd.request_settings.put_request_throttler); + /* for_disk_s3 = */ false); client_configuration.endpointOverride = upd.uri.endpoint; - client_configuration.maxConnections = static_cast(upd.request_settings.max_connections); + client_configuration.maxConnections = static_cast(upd.rw_settings.max_connections); auto credentials = Aws::Auth::AWSCredentials(upd.auth_settings.access_key_id, upd.auth_settings.secret_access_key); auto headers = upd.auth_settings.headers; @@ -1084,17 +1080,17 @@ void StorageS3::processNamedCollectionResult(StorageS3Configuration & configurat else if (arg_name == "use_environment_credentials") configuration.auth_settings.use_environment_credentials = checkAndGetLiteralArgument(arg_value, "use_environment_credentials"); else if (arg_name == "max_single_read_retries") - configuration.request_settings.max_single_read_retries = checkAndGetLiteralArgument(arg_value, "max_single_read_retries"); + configuration.rw_settings.max_single_read_retries = checkAndGetLiteralArgument(arg_value, "max_single_read_retries"); else if (arg_name == "min_upload_part_size") - configuration.request_settings.min_upload_part_size = checkAndGetLiteralArgument(arg_value, "min_upload_part_size"); + configuration.rw_settings.max_single_read_retries = checkAndGetLiteralArgument(arg_value, "min_upload_part_size"); else if (arg_name == "upload_part_size_multiply_factor") - configuration.request_settings.upload_part_size_multiply_factor = checkAndGetLiteralArgument(arg_value, "upload_part_size_multiply_factor"); + configuration.rw_settings.max_single_read_retries = checkAndGetLiteralArgument(arg_value, "upload_part_size_multiply_factor"); else if (arg_name == "upload_part_size_multiply_parts_count_threshold") - configuration.request_settings.upload_part_size_multiply_parts_count_threshold = checkAndGetLiteralArgument(arg_value, "upload_part_size_multiply_parts_count_threshold"); + configuration.rw_settings.max_single_read_retries = checkAndGetLiteralArgument(arg_value, "upload_part_size_multiply_parts_count_threshold"); else if (arg_name == "max_single_part_upload_size") - configuration.request_settings.max_single_part_upload_size = checkAndGetLiteralArgument(arg_value, "max_single_part_upload_size"); + configuration.rw_settings.max_single_read_retries = checkAndGetLiteralArgument(arg_value, "max_single_part_upload_size"); else if (arg_name == "max_connections") - configuration.request_settings.max_connections = checkAndGetLiteralArgument(arg_value, "max_connections"); + configuration.rw_settings.max_single_read_retries = checkAndGetLiteralArgument(arg_value, "max_connections"); else throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Unknown key-value argument `{}` for StorageS3, expected: url, [access_key_id, secret_access_key], name of used format and [compression_method].", @@ -1170,7 +1166,7 @@ ColumnsDescription StorageS3::getTableStructureFromData( S3Configuration s3_configuration{ configuration.url, configuration.auth_settings, - S3Settings::RequestSettings(ctx->getSettingsRef()), + S3Settings::ReadWriteSettings(ctx->getSettingsRef()), configuration.headers}; updateS3Configuration(ctx, s3_configuration); @@ -1232,7 +1228,7 @@ ColumnsDescription StorageS3::getTableStructureFromDataImpl( int zstd_window_log_max = static_cast(ctx->getSettingsRef().zstd_window_log_max); return wrapReadBufferWithCompressionMethod( std::make_unique( - s3_configuration.client, s3_configuration.uri.bucket, key, s3_configuration.uri.version_id, s3_configuration.request_settings, ctx->getReadSettings()), + s3_configuration.client, s3_configuration.uri.bucket, key, s3_configuration.uri.version_id, s3_configuration.rw_settings.max_single_read_retries, ctx->getReadSettings()), chooseCompressionMethod(key, compression_method), zstd_window_log_max); }; diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h index 47ac26abccb..81bbe2c86ae 100644 --- a/src/Storages/StorageS3.h +++ b/src/Storages/StorageS3.h @@ -43,8 +43,7 @@ public: const Block & virtual_header, ContextPtr context, std::unordered_map * object_infos = nullptr, - Strings * read_keys_ = nullptr, - const S3Settings::RequestSettings & request_settings_ = {}); + Strings * read_keys_ = nullptr); String next(); @@ -80,7 +79,7 @@ public: std::optional format_settings_, const ColumnsDescription & columns_, UInt64 max_block_size_, - const S3Settings::RequestSettings & request_settings_, + UInt64 max_single_read_retries_, String compression_hint_, const std::shared_ptr & client_, const String & bucket, @@ -103,7 +102,7 @@ private: String format; ColumnsDescription columns_desc; UInt64 max_block_size; - S3Settings::RequestSettings request_settings; + UInt64 max_single_read_retries; String compression_hint; std::shared_ptr client; Block sample_block; @@ -187,7 +186,7 @@ public: std::shared_ptr client; S3::AuthSettings auth_settings; - S3Settings::RequestSettings request_settings; + S3Settings::ReadWriteSettings rw_settings; /// If s3 configuration was passed from ast, then it is static. /// If from config - it can be changed with config reload. @@ -199,11 +198,11 @@ public: S3Configuration( const String & url_, const S3::AuthSettings & auth_settings_, - const S3Settings::RequestSettings & request_settings_, + const S3Settings::ReadWriteSettings & rw_settings_, const HeaderCollection & headers_from_ast_) : uri(S3::URI(url_)) , auth_settings(auth_settings_) - , request_settings(request_settings_) + , rw_settings(rw_settings_) , static_configuration(!auth_settings_.access_key_id.empty()) , headers_from_ast(headers_from_ast_) {} }; diff --git a/src/Storages/StorageS3Cluster.cpp b/src/Storages/StorageS3Cluster.cpp index ec970654b6e..350e942f972 100644 --- a/src/Storages/StorageS3Cluster.cpp +++ b/src/Storages/StorageS3Cluster.cpp @@ -46,7 +46,7 @@ StorageS3Cluster::StorageS3Cluster( const ConstraintsDescription & constraints_, ContextPtr context_) : IStorage(table_id_) - , s3_configuration{configuration_.url, configuration_.auth_settings, configuration_.request_settings, configuration_.headers} + , s3_configuration{configuration_.url, configuration_.auth_settings, configuration_.rw_settings, configuration_.headers} , filename(configuration_.url) , cluster_name(configuration_.cluster_name) , format_name(configuration_.format) diff --git a/src/Storages/StorageS3Settings.cpp b/src/Storages/StorageS3Settings.cpp index 962f7c113bd..68e15d10f52 100644 --- a/src/Storages/StorageS3Settings.cpp +++ b/src/Storages/StorageS3Settings.cpp @@ -4,7 +4,6 @@ #include #include -#include #include #include #include @@ -58,26 +57,18 @@ void StorageS3Settings::loadFromConfig(const String & config_elem, const Poco::U auto auth_settings = S3::AuthSettings::loadFromConfig(config_elem + "." + key, config); - S3Settings::RequestSettings request_settings; - request_settings.max_single_read_retries = get_uint_for_key(key, "max_single_read_retries", true, settings.s3_max_single_read_retries); - request_settings.min_upload_part_size = get_uint_for_key(key, "min_upload_part_size", true, settings.s3_min_upload_part_size); - request_settings.max_upload_part_size = get_uint_for_key(key, "max_upload_part_size", true, DEFAULT_MAX_UPLOAD_PART_SIZE); - request_settings.upload_part_size_multiply_factor = get_uint_for_key(key, "upload_part_size_multiply_factor", true, settings.s3_upload_part_size_multiply_factor); - request_settings.upload_part_size_multiply_parts_count_threshold = get_uint_for_key(key, "upload_part_size_multiply_parts_count_threshold", true, settings.s3_upload_part_size_multiply_parts_count_threshold); - request_settings.max_single_part_upload_size = get_uint_for_key(key, "max_single_part_upload_size", true, settings.s3_max_single_part_upload_size); - request_settings.max_single_operation_copy_size = get_uint_for_key(key, "max_single_operation_copy_size", true, DEFAULT_MAX_SINGLE_OPERATION_COPY_SIZE); - request_settings.max_connections = get_uint_for_key(key, "max_connections", true, settings.s3_max_connections); - request_settings.check_objects_after_upload = get_bool_for_key(key, "check_objects_after_upload", true, false); + S3Settings::ReadWriteSettings rw_settings; + rw_settings.max_single_read_retries = get_uint_for_key(key, "max_single_read_retries", true, settings.s3_max_single_read_retries); + rw_settings.min_upload_part_size = get_uint_for_key(key, "min_upload_part_size", true, settings.s3_min_upload_part_size); + rw_settings.max_upload_part_size = get_uint_for_key(key, "max_upload_part_size", true, DEFAULT_MAX_UPLOAD_PART_SIZE); + rw_settings.upload_part_size_multiply_factor = get_uint_for_key(key, "upload_part_size_multiply_factor", true, settings.s3_upload_part_size_multiply_factor); + rw_settings.upload_part_size_multiply_parts_count_threshold = get_uint_for_key(key, "upload_part_size_multiply_parts_count_threshold", true, settings.s3_upload_part_size_multiply_parts_count_threshold); + rw_settings.max_single_part_upload_size = get_uint_for_key(key, "max_single_part_upload_size", true, settings.s3_max_single_part_upload_size); + rw_settings.max_single_operation_copy_size = get_uint_for_key(key, "max_single_operation_copy_size", true, DEFAULT_MAX_SINGLE_OPERATION_COPY_SIZE); + rw_settings.max_connections = get_uint_for_key(key, "max_connections", true, settings.s3_max_connections); + rw_settings.check_objects_after_upload = get_bool_for_key(key, "check_objects_after_upload", true, false); - // NOTE: it would be better to reuse old throttlers to avoid losing token bucket state on every config reload, which could lead to exceeding limit for short time. But it is good enough unless very high `burst` values are used. - if (UInt64 max_get_rps = get_uint_for_key(key, "max_get_rps", true, settings.s3_max_get_rps)) - request_settings.get_request_throttler = std::make_shared( - max_get_rps, get_uint_for_key(key, "max_get_burst", true, settings.s3_max_get_burst ? settings.s3_max_get_burst : Throttler::default_burst_seconds * max_get_rps)); - if (UInt64 max_put_rps = get_uint_for_key(key, "max_put_rps", true, settings.s3_max_put_rps)) - request_settings.put_request_throttler = std::make_shared( - max_put_rps, get_uint_for_key(key, "max_put_burst", true, settings.s3_max_put_burst ? settings.s3_max_put_burst : Throttler::default_burst_seconds * max_put_rps)); - - s3_settings.emplace(endpoint, S3Settings{std::move(auth_settings), std::move(request_settings)}); + s3_settings.emplace(endpoint, S3Settings{std::move(auth_settings), std::move(rw_settings)}); } } } @@ -98,7 +89,7 @@ S3Settings StorageS3Settings::getSettings(const String & endpoint) const return {}; } -S3Settings::RequestSettings::RequestSettings(const Settings & settings) +S3Settings::ReadWriteSettings::ReadWriteSettings(const Settings & settings) { max_single_read_retries = settings.s3_max_single_read_retries; min_upload_part_size = settings.s3_min_upload_part_size; @@ -108,15 +99,9 @@ S3Settings::RequestSettings::RequestSettings(const Settings & settings) max_connections = settings.s3_max_connections; check_objects_after_upload = settings.s3_check_objects_after_upload; max_unexpected_write_error_retries = settings.s3_max_unexpected_write_error_retries; - if (settings.s3_max_get_rps) - get_request_throttler = std::make_shared( - settings.s3_max_get_rps, settings.s3_max_get_burst ? settings.s3_max_get_burst : Throttler::default_burst_seconds * settings.s3_max_get_rps); - if (settings.s3_max_put_rps) - put_request_throttler = std::make_shared( - settings.s3_max_put_rps, settings.s3_max_put_burst ? settings.s3_max_put_burst : Throttler::default_burst_seconds * settings.s3_max_put_rps); } -void S3Settings::RequestSettings::updateFromSettingsIfEmpty(const Settings & settings) +void S3Settings::ReadWriteSettings::updateFromSettingsIfEmpty(const Settings & settings) { if (!max_single_read_retries) max_single_read_retries = settings.s3_max_single_read_retries; @@ -137,12 +122,6 @@ void S3Settings::RequestSettings::updateFromSettingsIfEmpty(const Settings & set if (!max_unexpected_write_error_retries) max_unexpected_write_error_retries = settings.s3_max_unexpected_write_error_retries; check_objects_after_upload = settings.s3_check_objects_after_upload; - if (!get_request_throttler && settings.s3_max_get_rps) - get_request_throttler = std::make_shared( - settings.s3_max_get_rps, settings.s3_max_get_burst ? settings.s3_max_get_burst : Throttler::default_burst_seconds * settings.s3_max_get_rps); - if (!put_request_throttler && settings.s3_max_put_rps) - put_request_throttler = std::make_shared( - settings.s3_max_put_rps, settings.s3_max_put_burst ? settings.s3_max_put_burst : Throttler::default_burst_seconds * settings.s3_max_put_rps); } } diff --git a/src/Storages/StorageS3Settings.h b/src/Storages/StorageS3Settings.h index 955cd2d025b..bd90ba569d8 100644 --- a/src/Storages/StorageS3Settings.h +++ b/src/Storages/StorageS3Settings.h @@ -7,7 +7,6 @@ #include #include #include -#include #include #include @@ -24,7 +23,7 @@ struct Settings; struct S3Settings { - struct RequestSettings + struct ReadWriteSettings { size_t max_single_read_retries = 0; size_t min_upload_part_size = 0; @@ -36,13 +35,11 @@ struct S3Settings size_t max_connections = 0; bool check_objects_after_upload = false; size_t max_unexpected_write_error_retries = 0; - ThrottlerPtr get_request_throttler; - ThrottlerPtr put_request_throttler; - RequestSettings() = default; - explicit RequestSettings(const Settings & settings); + ReadWriteSettings() = default; + explicit ReadWriteSettings(const Settings & settings); - inline bool operator==(const RequestSettings & other) const + inline bool operator==(const ReadWriteSettings & other) const { return max_single_read_retries == other.max_single_read_retries && min_upload_part_size == other.min_upload_part_size @@ -53,20 +50,18 @@ struct S3Settings && max_single_operation_copy_size == other.max_single_operation_copy_size && max_connections == other.max_connections && check_objects_after_upload == other.check_objects_after_upload - && max_unexpected_write_error_retries == other.max_unexpected_write_error_retries - && get_request_throttler == other.get_request_throttler - && put_request_throttler == other.put_request_throttler; + && max_unexpected_write_error_retries == other.max_unexpected_write_error_retries; } void updateFromSettingsIfEmpty(const Settings & settings); }; S3::AuthSettings auth_settings; - RequestSettings request_settings; + ReadWriteSettings rw_settings; inline bool operator==(const S3Settings & other) const { - return auth_settings == other.auth_settings && request_settings == other.request_settings; + return auth_settings == other.auth_settings && rw_settings == other.rw_settings; } }; diff --git a/tests/queries/0_stateless/02477_s3_request_throttler.reference b/tests/queries/0_stateless/02477_s3_request_throttler.reference deleted file mode 100644 index 9315e86b328..00000000000 --- a/tests/queries/0_stateless/02477_s3_request_throttler.reference +++ /dev/null @@ -1,2 +0,0 @@ -1 -1 1 1 diff --git a/tests/queries/0_stateless/02477_s3_request_throttler.sql b/tests/queries/0_stateless/02477_s3_request_throttler.sql deleted file mode 100644 index 25653b1bab0..00000000000 --- a/tests/queries/0_stateless/02477_s3_request_throttler.sql +++ /dev/null @@ -1,26 +0,0 @@ --- Tags: no-fasttest --- Tag no-fasttest: needs s3 - --- Limit S3 PUT request per second rate -SET s3_max_put_rps = 2; -SET s3_max_put_burst = 1; - -CREATE TEMPORARY TABLE times (t DateTime); - --- INSERT query requires 3 PUT requests and 1/rps = 0.5 second in between, the first query is not throttled due to burst -INSERT INTO times SELECT now(); -INSERT INTO TABLE FUNCTION s3('http://localhost:11111/test/request-throttler.csv', 'test', 'testtest', 'CSV', 'number UInt64') SELECT number FROM numbers(1000000) SETTINGS s3_max_single_part_upload_size = 10000, s3_truncate_on_insert = 1; -INSERT INTO times SELECT now(); - -SELECT max(t) - min(t) >= 1 FROM times; - -SYSTEM FLUSH LOGS; -SELECT ProfileEvents['S3CreateMultipartUpload'] == 1, - ProfileEvents['S3UploadPart'] == 1, - ProfileEvents['S3CompleteMultipartUpload'] == 1 -FROM system.query_log -WHERE query LIKE '%request-throttler.csv%' -AND type = 'QueryFinish' -AND current_database = currentDatabase() -ORDER BY query_start_time DESC -LIMIT 1; From 93dde0b3f922efdb9882eb7c9f2b50eda693a1a3 Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Wed, 16 Nov 2022 16:18:20 -0500 Subject: [PATCH 74/80] Update docs for Date32 --- docs/en/sql-reference/data-types/date32.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/data-types/date32.md b/docs/en/sql-reference/data-types/date32.md index c8c7470d2cb..db41fdf2fc3 100644 --- a/docs/en/sql-reference/data-types/date32.md +++ b/docs/en/sql-reference/data-types/date32.md @@ -6,7 +6,7 @@ sidebar_label: Date32 # Date32 -A date. Supports the date range same with [DateTime64](../../sql-reference/data-types/datetime64.md). Stored in four bytes as the number of days since 1900-01-01. Allows storing values till 2299-12-31. +A date. Supports the date range same with [DateTime64](../../sql-reference/data-types/datetime64.md). Stored as a signed 32-bit integer in native byte order with the value representing the days since 1970-01-01 (0 represents 1970-01-01 and negative values represent the days before 1970). **Examples** From f4eb430caad8d5e5410b29ab0d5267e0048e8961 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 17 Nov 2022 03:14:22 +0100 Subject: [PATCH 75/80] Revert "remove changes" This reverts commit 4016fd2efcb196a97bfc157f661acf6c165f1fa8. --- CHANGELOG.md | 109 +++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 106 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 68767612892..051bde44dd2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,5 @@ ### Table of Contents +**[ClickHouse release v22.11, 2022-11-17](#2211)**
**[ClickHouse release v22.10, 2022-10-25](#2210)**
**[ClickHouse release v22.9, 2022-09-22](#229)**
**[ClickHouse release v22.8-lts, 2022-08-18](#228)**
@@ -11,6 +12,108 @@ **[ClickHouse release v22.1, 2022-01-18](#221)**
**[Changelog for 2021](https://clickhouse.com/docs/en/whats-new/changelog/2021/)**
+### ClickHouse release 22.11, 2022-11-17 + +#### Backward Incompatible Change +* `JSONExtract` family of functions will now attempt to coerce to the requested type. [#41502](https://github.com/ClickHouse/ClickHouse/pull/41502) ([Márcio Martins](https://github.com/marcioapm)). + +#### New Feature +* Adds support for retries during INSERTs into ReplicatedMergeTree when a session with ClickHouse Keeper is lost. Apart from fault tolerance, it aims to provide better user experience, - avoid returning a user an error during insert if keeper is restarted (for example, due to upgrade). [#42607](https://github.com/ClickHouse/ClickHouse/pull/42607) ([Igor Nikonov](https://github.com/devcrafter)). +* Add `Hudi` and `DeltaLake` table engines, read-only, only for tables on S3. [#41054](https://github.com/ClickHouse/ClickHouse/pull/41054) ([Daniil Rubin](https://github.com/rubin-do), [Kseniia Sumarokova](https://github.com/kssenii)). +* Add table function `hudi` and `deltaLake`. [#43080](https://github.com/ClickHouse/ClickHouse/pull/43080) ([flynn](https://github.com/ucasfl)). +* Support for composite time intervals. 1. Add, subtract and negate operations are now available on Intervals. In the case where the types of Intervals are different, they will be transformed into the Tuple of those types. 2. A tuple of intervals can be added to or subtracted from a Date/DateTime field. 3. Added parsing of Intervals with different types, for example: `INTERVAL '1 HOUR 1 MINUTE 1 SECOND'`. [#42195](https://github.com/ClickHouse/ClickHouse/pull/42195) ([Nikolay Degterinsky](https://github.com/evillique)). +* Added `**` glob support for recursive directory traversal of the filesystem and S3. Resolves [#36316](https://github.com/ClickHouse/ClickHouse/issues/36316). [#42376](https://github.com/ClickHouse/ClickHouse/pull/42376) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Introduce `s3_plain` disk type for write-once-read-many operations. Implement `ATTACH` of `MergeTree` table for `s3_plain` disk. [#42628](https://github.com/ClickHouse/ClickHouse/pull/42628) ([Azat Khuzhin](https://github.com/azat)). +* Added applied row-level policies to `system.query_log`. [#39819](https://github.com/ClickHouse/ClickHouse/pull/39819) ([Vladimir Chebotaryov](https://github.com/quickhouse)). +* Add four-letter command `csnp` for manually creating snapshots in ClickHouse Keeper. Additionally, `lgif` was added to get Raft information for a specific node (e.g. index of last created snapshot, last committed log index). [#41766](https://github.com/ClickHouse/ClickHouse/pull/41766) ([JackyWoo](https://github.com/JackyWoo)). +* Add function `ascii` like in Apache Spark: https://spark.apache.org/docs/latest/api/sql/#ascii. [#42670](https://github.com/ClickHouse/ClickHouse/pull/42670) ([李扬](https://github.com/taiyang-li)). +* Add function `pmod` which returns non-negative result based on modulo. [#42755](https://github.com/ClickHouse/ClickHouse/pull/42755) ([李扬](https://github.com/taiyang-li)). +* Add function `formatReadableDecimalSize`. [#42774](https://github.com/ClickHouse/ClickHouse/pull/42774) ([Alejandro](https://github.com/alexon1234)). +* Add function `randCanonical`, which is similar to the `rand` function in Apache Spark or Impala. The function generates pseudo random results with independent and identically distributed uniformly distributed values in [0, 1). [#43124](https://github.com/ClickHouse/ClickHouse/pull/43124) ([李扬](https://github.com/taiyang-li)). +* Add function `displayName`, closes [#36770](https://github.com/ClickHouse/ClickHouse/issues/36770). [#37681](https://github.com/ClickHouse/ClickHouse/pull/37681) ([hongbin](https://github.com/xlwh)). +* Add `min_age_to_force_merge_on_partition_only` setting to optimize old parts for the entire partition only. [#42659](https://github.com/ClickHouse/ClickHouse/pull/42659) ([Antonio Andelic](https://github.com/antonio2368)). +* Add generic implementation for arbitrary structured named collections, access type and `system.named_collections`. [#43147](https://github.com/ClickHouse/ClickHouse/pull/43147) ([Kseniia Sumarokova](https://github.com/kssenii)). + +#### Performance Improvement +* `match` function can use the index if it's a condition on string prefix. This closes [#37333](https://github.com/ClickHouse/ClickHouse/issues/37333). [#42458](https://github.com/ClickHouse/ClickHouse/pull/42458) ([clarkcaoliu](https://github.com/Clark0)). +* Speed up AND and OR operators when they are sequenced. [#42214](https://github.com/ClickHouse/ClickHouse/pull/42214) ([Zhiguo Zhou](https://github.com/ZhiguoZh)). +* Support parallel parsing for `LineAsString` input format. This improves performance just slightly. This closes [#42502](https://github.com/ClickHouse/ClickHouse/issues/42502). [#42780](https://github.com/ClickHouse/ClickHouse/pull/42780) ([Kruglov Pavel](https://github.com/Avogar)). +* ClickHouse Keeper performance improvement: improve commit performance for cases when many different nodes have uncommitted states. This should help with cases when a follower node can't sync fast enough. [#42926](https://github.com/ClickHouse/ClickHouse/pull/42926) ([Antonio Andelic](https://github.com/antonio2368)). +* A condition like `NOT LIKE 'prefix%'` can use the primary index. [#42209](https://github.com/ClickHouse/ClickHouse/pull/42209) ([Duc Canh Le](https://github.com/canhld94)). + +#### Experimental Feature +* Support type `Object` inside other types, e.g. `Array(JSON)`. [#36969](https://github.com/ClickHouse/ClickHouse/pull/36969) ([Anton Popov](https://github.com/CurtizJ)). +* Ignore MySQL binlog SAVEPOINT event for MaterializedMySQL. [#42931](https://github.com/ClickHouse/ClickHouse/pull/42931) ([zzsmdfj](https://github.com/zzsmdfj)). Handle (ignore) SAVEPOINT queries in MaterializedMySQL. [#43086](https://github.com/ClickHouse/ClickHouse/pull/43086) ([Stig Bakken](https://github.com/stigsb)). + +#### Improvement +* Trivial queries with small LIMIT will properly determine the number of estimated rows to read, so that the threshold will be checked properly. Closes [#7071](https://github.com/ClickHouse/ClickHouse/issues/7071). [#42580](https://github.com/ClickHouse/ClickHouse/pull/42580) ([Han Fei](https://github.com/hanfei1991)). +* Add support for interactive parameters in INSERT VALUES queries. [#43077](https://github.com/ClickHouse/ClickHouse/pull/43077) ([Nikolay Degterinsky](https://github.com/evillique)). +* Added new field `allow_readonly` in `system.table_functions` to allow using table functions in readonly mode. Resolves [#42414](https://github.com/ClickHouse/ClickHouse/issues/42414) Implementation: * Added a new field allow_readonly to table system.table_functions. * Updated to use new field allow_readonly to allow using table functions in readonly mode. Testing: * Added a test for filesystem tests/queries/0_stateless/02473_functions_in_readonly_mode.sh Documentation: * Updated the english documentation for Table Functions. [#42708](https://github.com/ClickHouse/ClickHouse/pull/42708) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* The `system.asynchronous_metrics` gets embedded documentation. This documentation is also exported to Prometheus. Fixed an error with the metrics about `cache` disks - they were calculated only for one arbitrary cache disk instead all of them. This closes [#7644](https://github.com/ClickHouse/ClickHouse/issues/7644). [#43194](https://github.com/ClickHouse/ClickHouse/pull/43194) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Throttling algorithm changed to token bucket. [#42665](https://github.com/ClickHouse/ClickHouse/pull/42665) ([Sergei Trifonov](https://github.com/serxa)). +* Mask passwords and secret keys both in `system.query_log` and `/var/log/clickhouse-server/*.log` and also in error messages. [#42484](https://github.com/ClickHouse/ClickHouse/pull/42484) ([Vitaly Baranov](https://github.com/vitlibar)). +* Remove covered parts for fetched part (to avoid possible replication delay grows). [#39737](https://github.com/ClickHouse/ClickHouse/pull/39737) ([Azat Khuzhin](https://github.com/azat)). +* If `/dev/tty` is available, the progress in clickhouse-client and clickhouse-local will be rendered directly to the terminal, without writing to STDERR. It allows getting progress even if STDERR is redirected to a file, and the file will not be polluted by terminal escape sequences. The progress can be disabled by `--progress false`. This closes [#32238](https://github.com/ClickHouse/ClickHouse/issues/32238). [#42003](https://github.com/ClickHouse/ClickHouse/pull/42003) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add support for `FixedString` input to base64 coding functions. [#42285](https://github.com/ClickHouse/ClickHouse/pull/42285) ([ltrk2](https://github.com/ltrk2)). +* Add columns `bytes_on_disk` and `path` to `system.detached_parts`. Closes [#42264](https://github.com/ClickHouse/ClickHouse/issues/42264). [#42303](https://github.com/ClickHouse/ClickHouse/pull/42303) ([chen](https://github.com/xiedeyantu)). +* Improve using structure from insertion table in table functions, now setting `use_structure_from_insertion_table_in_table_functions` has new possible value - `2` that means that ClickHouse will try to determine if we can use structure from insertion table or not automatically. Closes [#40028](https://github.com/ClickHouse/ClickHouse/issues/40028). [#42320](https://github.com/ClickHouse/ClickHouse/pull/42320) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix no progress indication on INSERT FROM INFILE. Closes [#42548](https://github.com/ClickHouse/ClickHouse/issues/42548). [#42634](https://github.com/ClickHouse/ClickHouse/pull/42634) ([chen](https://github.com/xiedeyantu)). +* Refactor function `tokens` to enable max tokens returned for related functions (disabled by default). [#42673](https://github.com/ClickHouse/ClickHouse/pull/42673) ([李扬](https://github.com/taiyang-li)). +* Allow to use `Date32` arguments for `formatDateTime` and `FROM_UNIXTIME` functions. [#42737](https://github.com/ClickHouse/ClickHouse/pull/42737) ([Roman Vasin](https://github.com/rvasin)). +* Update tzdata to 2022f. Mexico will no longer observe DST except near the US border: https://www.timeanddate.com/news/time/mexico-abolishes-dst-2022.html. Chihuahua moves to year-round UTC-6 on 2022-10-30. Fiji no longer observes DST. See https://github.com/google/cctz/pull/235 and https://bugs.launchpad.net/ubuntu/+source/tzdata/+bug/1995209. [#42796](https://github.com/ClickHouse/ClickHouse/pull/42796) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add `FailedAsyncInsertQuery` event metric for async inserts. [#42814](https://github.com/ClickHouse/ClickHouse/pull/42814) ([Krzysztof Góralski](https://github.com/kgoralski)). +* Implement `read-in-order` optimization on top of query plan. It is enabled by default. Set `query_plan_read_in_order = 0` to use previous AST-based version. [#42829](https://github.com/ClickHouse/ClickHouse/pull/42829) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Increase the size of upload part exponentially for backup to S3 to avoid errors about max 10 000 parts limit of the multipart upload to s3. [#42833](https://github.com/ClickHouse/ClickHouse/pull/42833) ([Vitaly Baranov](https://github.com/vitlibar)). +* When the merge task is continuously busy and the disk space is insufficient, the completely expired parts cannot be selected and dropped, resulting in insufficient disk space. My idea is that when the entire Part expires, there is no need for additional disk space to guarantee, ensure the normal execution of TTL. [#42869](https://github.com/ClickHouse/ClickHouse/pull/42869) ([zhongyuankai](https://github.com/zhongyuankai)). +* Add `oss` function and `OSS` table engine (this is convenient for users). oss is fully compatible with s3. [#43155](https://github.com/ClickHouse/ClickHouse/pull/43155) ([zzsmdfj](https://github.com/zzsmdfj)). +* Improve error reporting in the collection of OS-related info for the `system.asynchronous_metrics` table. [#43192](https://github.com/ClickHouse/ClickHouse/pull/43192) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Modify the `INFORMATION_SCHEMA` tables in a way so that ClickHouse can connect to itself using the MySQL compatibility protocol. Add columns instead of aliases (related to [#9769](https://github.com/ClickHouse/ClickHouse/issues/9769)). It will improve the compatibility with various MySQL clients. [#43198](https://github.com/ClickHouse/ClickHouse/pull/43198) ([Filatenkov Artur](https://github.com/FArthur-cmd)). +* Add some functions for compatibility with PowerBI, when it connects using MySQL protocol [#42612](https://github.com/ClickHouse/ClickHouse/pull/42612) ([Filatenkov Artur](https://github.com/FArthur-cmd)). +* Better usability for Dashboard on changes [#42872](https://github.com/ClickHouse/ClickHouse/pull/42872) ([Vladimir C](https://github.com/vdimir)). + +#### Build/Testing/Packaging Improvement +* Run SQLancer for each pull request and commit to master. [SQLancer](https://github.com/sqlancer/sqlancer) is an OpenSource fuzzer that focuses on automatic detection of logical bugs. [#42397](https://github.com/ClickHouse/ClickHouse/pull/42397) ([Ilya Yatsishin](https://github.com/qoega)). +* Update to latest zlib-ng. [#42463](https://github.com/ClickHouse/ClickHouse/pull/42463) ([Boris Kuschel](https://github.com/bkuschel)). +* Add support for testing ClickHouse server with Jepsen. By the way, we already have support for testing ClickHouse Keeper with Jepsen. This pull request extends it to Replicated tables. [#42619](https://github.com/ClickHouse/ClickHouse/pull/42619) ([Antonio Andelic](https://github.com/antonio2368)). +* Use https://github.com/matus-chochlik/ctcache for clang-tidy results caching. [#42913](https://github.com/ClickHouse/ClickHouse/pull/42913) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Before the fix, the user-defined config was preserved by RPM in `$file.rpmsave`. The PR fixes it and won't replace the user's files from packages. [#42936](https://github.com/ClickHouse/ClickHouse/pull/42936) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Remove some libraries from Ubuntu Docker image. [#42622](https://github.com/ClickHouse/ClickHouse/pull/42622) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Bug Fix (user-visible misbehavior in official stable or prestable release) + +* Updated normaliser to clone the alias ast. Resolves [#42452](https://github.com/ClickHouse/ClickHouse/issues/42452) Implementation: * Updated QueryNormalizer to clone alias ast, when its replaced. Previously just assigning the same leads to exception in LogicalExpressinsOptimizer as it would be the same parent being inserted again. * This bug is not seen with new analyser (allow_experimental_analyzer), so no changes for it. I added a test for the same. [#42827](https://github.com/ClickHouse/ClickHouse/pull/42827) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Fix race for backup of tables in `Lazy` databases. [#43104](https://github.com/ClickHouse/ClickHouse/pull/43104) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix for `skip_unavailable_shards`: it did not work with the `s3Cluster` table function. [#43131](https://github.com/ClickHouse/ClickHouse/pull/43131) ([chen](https://github.com/xiedeyantu)). +* Fix schema inference in `s3Cluster` and improvement in `hdfsCluster`. [#41979](https://github.com/ClickHouse/ClickHouse/pull/41979) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix retries while reading from URL table engines / table function. (retriable errors could be retries more times than needed, non-retriable errors resulted in failed assertion in code). [#42224](https://github.com/ClickHouse/ClickHouse/pull/42224) ([Kseniia Sumarokova](https://github.com/kssenii)). +* A segmentation fault related to DNS & c-ares has been reported and fixed. [#42234](https://github.com/ClickHouse/ClickHouse/pull/42234) ([Arthur Passos](https://github.com/arthurpassos)). +* Fix `LOGICAL_ERROR` `Arguments of 'plus' have incorrect data types` which may happen in PK analysis (monotonicity check). Fix invalid PK analysis for monotonic binary functions with first constant argument. [#42410](https://github.com/ClickHouse/ClickHouse/pull/42410) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix incorrect key analysis when key types cannot be inside Nullable. This fixes [#42456](https://github.com/ClickHouse/ClickHouse/issues/42456). [#42469](https://github.com/ClickHouse/ClickHouse/pull/42469) ([Amos Bird](https://github.com/amosbird)). +* Fix typo in a setting name that led to bad usage of schema inference cache while using setting `input_format_csv_use_best_effort_in_schema_inference`. Closes [#41735](https://github.com/ClickHouse/ClickHouse/issues/41735). [#42536](https://github.com/ClickHouse/ClickHouse/pull/42536) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix creating a Set with wrong header when data type is LowCardinality. Closes [#42460](https://github.com/ClickHouse/ClickHouse/issues/42460). [#42579](https://github.com/ClickHouse/ClickHouse/pull/42579) ([flynn](https://github.com/ucasfl)). +* `(U)Int128` and `(U)Int256` values were correctly checked in `PREWHERE`. [#42605](https://github.com/ClickHouse/ClickHouse/pull/42605) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix a bug in functions parser that could have led to a segmentation fault. [#42724](https://github.com/ClickHouse/ClickHouse/pull/42724) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix the locking in `truncate table`. [#42728](https://github.com/ClickHouse/ClickHouse/pull/42728) ([flynn](https://github.com/ucasfl)). +* Fix possible crash in `web` disks when file does not exist (or `OPTIMIZE TABLE FINAL`, that also can got the same error eventually). [#42767](https://github.com/ClickHouse/ClickHouse/pull/42767) ([Azat Khuzhin](https://github.com/azat)). +* Fix `auth_type` mapping in `system.session_log`, by including `SSL_CERTIFICATE` for the enum values. [#42782](https://github.com/ClickHouse/ClickHouse/pull/42782) ([Miel Donkers](https://github.com/mdonkers)). +* Fix stack-use-after-return under ASAN build in the Create User query parser. [#42804](https://github.com/ClickHouse/ClickHouse/pull/42804) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix `lowerUTF8`/`upperUTF8` in case of symbol was in between 16-byte boundary (very frequent case of you have strings > 16 bytes long). [#42812](https://github.com/ClickHouse/ClickHouse/pull/42812) ([Azat Khuzhin](https://github.com/azat)). +* Additional bound check was added to LZ4 decompression routine to fix misbehaviour in case of malformed input. [#42868](https://github.com/ClickHouse/ClickHouse/pull/42868) ([Nikita Taranov](https://github.com/nickitat)). +* Fix rare possible hang on query cancellation. [#42874](https://github.com/ClickHouse/ClickHouse/pull/42874) ([Azat Khuzhin](https://github.com/azat)). +* Fix incorrect behavior with multiple disjuncts in hash join, close [#42832](https://github.com/ClickHouse/ClickHouse/issues/42832). [#42876](https://github.com/ClickHouse/ClickHouse/pull/42876) ([Vladimir C](https://github.com/vdimir)). +* A null pointer will be generated when select if as from ‘three table join’ , For example, this SQL query: [#42883](https://github.com/ClickHouse/ClickHouse/pull/42883) ([zzsmdfj](https://github.com/zzsmdfj)). +* Fix memory sanitizer report in Cluster Discovery, close [#42763](https://github.com/ClickHouse/ClickHouse/issues/42763). [#42905](https://github.com/ClickHouse/ClickHouse/pull/42905) ([Vladimir C](https://github.com/vdimir)). +* Improve DateTime schema inference in case of empty string. [#42911](https://github.com/ClickHouse/ClickHouse/pull/42911) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix rare NOT_FOUND_COLUMN_IN_BLOCK error when projection is possible to use but there is no projection available. This fixes [#42771](https://github.com/ClickHouse/ClickHouse/issues/42771) . The bug was introduced in https://github.com/ClickHouse/ClickHouse/pull/25563. [#42938](https://github.com/ClickHouse/ClickHouse/pull/42938) ([Amos Bird](https://github.com/amosbird)). +* Fix ATTACH TABLE in `PostgreSQL` database engine if the table contains DATETIME data type. Closes [#42817](https://github.com/ClickHouse/ClickHouse/issues/42817). [#42960](https://github.com/ClickHouse/ClickHouse/pull/42960) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix lambda parsing. Closes [#41848](https://github.com/ClickHouse/ClickHouse/issues/41848). [#42979](https://github.com/ClickHouse/ClickHouse/pull/42979) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix incorrect key analysis when nullable keys appear in the middle of a hyperrectangle. This fixes [#43111](https://github.com/ClickHouse/ClickHouse/issues/43111) . [#43133](https://github.com/ClickHouse/ClickHouse/pull/43133) ([Amos Bird](https://github.com/amosbird)). +* Fix several buffer over-reads in deserialization of carefully crafted aggregate function states. [#43159](https://github.com/ClickHouse/ClickHouse/pull/43159) ([Raúl Marín](https://github.com/Algunenano)). +* Fix function `if` in case of NULL and const Nullable arguments. Closes [#43069](https://github.com/ClickHouse/ClickHouse/issues/43069). [#43178](https://github.com/ClickHouse/ClickHouse/pull/43178) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix decimal math overflow in parsing DateTime with the 'best effort' algorithm. Closes [#43061](https://github.com/ClickHouse/ClickHouse/issues/43061). [#43180](https://github.com/ClickHouse/ClickHouse/pull/43180) ([Kruglov Pavel](https://github.com/Avogar)). +* The `indent` field produced by the `git-import` tool was miscalculated. See https://clickhouse.com/docs/en/getting-started/example-datasets/github/. [#43191](https://github.com/ClickHouse/ClickHouse/pull/43191) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fixed unexpected behaviour of `Interval` types with subquery and casting. [#43193](https://github.com/ClickHouse/ClickHouse/pull/43193) ([jh0x](https://github.com/jh0x)). + ### ClickHouse release 22.10, 2022-10-26 #### Backward Incompatible Change @@ -570,7 +673,7 @@ * Support SQL standard CREATE INDEX and DROP INDEX syntax. [#35166](https://github.com/ClickHouse/ClickHouse/pull/35166) ([Jianmei Zhang](https://github.com/zhangjmruc)). * Send profile events for INSERT queries (previously only SELECT was supported). [#37391](https://github.com/ClickHouse/ClickHouse/pull/37391) ([Azat Khuzhin](https://github.com/azat)). * Implement in order aggregation (`optimize_aggregation_in_order`) for fully materialized projections. [#37469](https://github.com/ClickHouse/ClickHouse/pull/37469) ([Azat Khuzhin](https://github.com/azat)). -* Remove subprocess run for kerberos initialization. Added new integration test. Closes [#27651](https://github.com/ClickHouse/ClickHouse/issues/27651). [#38105](https://github.com/ClickHouse/ClickHouse/pull/38105) ([Roman Vasin](https://github.com/rvasin)). +* Remove subprocess run for Kerberos initialization. Added new integration test. Closes [#27651](https://github.com/ClickHouse/ClickHouse/issues/27651). [#38105](https://github.com/ClickHouse/ClickHouse/pull/38105) ([Roman Vasin](https://github.com/rvasin)). * * Add setting `multiple_joins_try_to_keep_original_names` to not rewrite identifier name on multiple JOINs rewrite, close [#34697](https://github.com/ClickHouse/ClickHouse/issues/34697). [#38149](https://github.com/ClickHouse/ClickHouse/pull/38149) ([Vladimir C](https://github.com/vdimir)). * Improved trace-visualizer UX. [#38169](https://github.com/ClickHouse/ClickHouse/pull/38169) ([Sergei Trifonov](https://github.com/serxa)). * Enable stack trace collection and query profiler for AArch64. [#38181](https://github.com/ClickHouse/ClickHouse/pull/38181) ([Maksim Kita](https://github.com/kitaisreal)). @@ -850,8 +953,8 @@ #### Upgrade Notes -* Now, background merges, mutations and `OPTIMIZE` will not increment `SelectedRows` and `SelectedBytes` metrics. They (still) will increment `MergedRows` and `MergedUncompressedBytes` as it was before. This only affects the metric values, and makes them better. This change does not introduce any incompatibility, but you may wonder about the changes of metrics, so we put in this category. [#37040](https://github.com/ClickHouse/ClickHouse/pull/37040) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Updated the BoringSSL module to the official FIPS compliant version. This makes ClickHouse FIPS compliant. [#35914](https://github.com/ClickHouse/ClickHouse/pull/35914) ([Meena-Renganathan](https://github.com/Meena-Renganathan)). The ciphers `aes-192-cfb128` and `aes-256-cfb128` were removed, because they are not included in the FIPS certified version of BoringSSL. +* Now, background merges, mutations, and `OPTIMIZE` will not increment `SelectedRows` and `SelectedBytes` metrics. They (still) will increment `MergedRows` and `MergedUncompressedBytes` as it was before. This only affects the metric values and makes them better. This change does not introduce any incompatibility, but you may wonder about the changes to the metrics, so we put in this category. [#37040](https://github.com/ClickHouse/ClickHouse/pull/37040) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Updated the BoringSSL module to the official FIPS compliant version. This makes ClickHouse FIPS compliant in this area. [#35914](https://github.com/ClickHouse/ClickHouse/pull/35914) ([Meena-Renganathan](https://github.com/Meena-Renganathan)). The ciphers `aes-192-cfb128` and `aes-256-cfb128` were removed, because they are not included in the FIPS certified version of BoringSSL. * `max_memory_usage` setting is removed from the default user profile in `users.xml`. This enables flexible memory limits for queries instead of the old rigid limit of 10 GB. * Disable `log_query_threads` setting by default. It controls the logging of statistics about every thread participating in query execution. After supporting asynchronous reads, the total number of distinct thread ids became too large, and logging into the `query_thread_log` has become too heavy. [#37077](https://github.com/ClickHouse/ClickHouse/pull/37077) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * Remove function `groupArraySorted` which has a bug. [#36822](https://github.com/ClickHouse/ClickHouse/pull/36822) ([Alexey Milovidov](https://github.com/alexey-milovidov)). From aa855d86a12c048359d68fb88ea9c55b5c5168a4 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 17 Nov 2022 05:51:20 +0300 Subject: [PATCH 76/80] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 051bde44dd2..9fc3a589374 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,7 +18,7 @@ * `JSONExtract` family of functions will now attempt to coerce to the requested type. [#41502](https://github.com/ClickHouse/ClickHouse/pull/41502) ([Márcio Martins](https://github.com/marcioapm)). #### New Feature -* Adds support for retries during INSERTs into ReplicatedMergeTree when a session with ClickHouse Keeper is lost. Apart from fault tolerance, it aims to provide better user experience, - avoid returning a user an error during insert if keeper is restarted (for example, due to upgrade). [#42607](https://github.com/ClickHouse/ClickHouse/pull/42607) ([Igor Nikonov](https://github.com/devcrafter)). +* Adds support for retries during INSERTs into ReplicatedMergeTree when a session with ClickHouse Keeper is lost. Apart from fault tolerance, it aims to provide better user experience, - avoid returning a user an error during insert if keeper is restarted (for example, due to upgrade). This is controlled by the `insert_keeper_max_retries` setting, which is disabled by default. [#42607](https://github.com/ClickHouse/ClickHouse/pull/42607) ([Igor Nikonov](https://github.com/devcrafter)). * Add `Hudi` and `DeltaLake` table engines, read-only, only for tables on S3. [#41054](https://github.com/ClickHouse/ClickHouse/pull/41054) ([Daniil Rubin](https://github.com/rubin-do), [Kseniia Sumarokova](https://github.com/kssenii)). * Add table function `hudi` and `deltaLake`. [#43080](https://github.com/ClickHouse/ClickHouse/pull/43080) ([flynn](https://github.com/ucasfl)). * Support for composite time intervals. 1. Add, subtract and negate operations are now available on Intervals. In the case where the types of Intervals are different, they will be transformed into the Tuple of those types. 2. A tuple of intervals can be added to or subtracted from a Date/DateTime field. 3. Added parsing of Intervals with different types, for example: `INTERVAL '1 HOUR 1 MINUTE 1 SECOND'`. [#42195](https://github.com/ClickHouse/ClickHouse/pull/42195) ([Nikolay Degterinsky](https://github.com/evillique)). From 10d7ce98154e3532f36072f331dd90973571f1a5 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 17 Nov 2022 05:41:03 +0100 Subject: [PATCH 77/80] pmod: compatibility with Spark, better documentation --- CHANGELOG.md | 2 +- src/Functions/modulo.cpp | 12 +++++++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 051bde44dd2..034ba26897e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,7 +27,7 @@ * Added applied row-level policies to `system.query_log`. [#39819](https://github.com/ClickHouse/ClickHouse/pull/39819) ([Vladimir Chebotaryov](https://github.com/quickhouse)). * Add four-letter command `csnp` for manually creating snapshots in ClickHouse Keeper. Additionally, `lgif` was added to get Raft information for a specific node (e.g. index of last created snapshot, last committed log index). [#41766](https://github.com/ClickHouse/ClickHouse/pull/41766) ([JackyWoo](https://github.com/JackyWoo)). * Add function `ascii` like in Apache Spark: https://spark.apache.org/docs/latest/api/sql/#ascii. [#42670](https://github.com/ClickHouse/ClickHouse/pull/42670) ([李扬](https://github.com/taiyang-li)). -* Add function `pmod` which returns non-negative result based on modulo. [#42755](https://github.com/ClickHouse/ClickHouse/pull/42755) ([李扬](https://github.com/taiyang-li)). +* Add function `positive_modulo` (`pmod`) which returns non-negative result based on modulo. [#42755](https://github.com/ClickHouse/ClickHouse/pull/42755) ([李扬](https://github.com/taiyang-li)). * Add function `formatReadableDecimalSize`. [#42774](https://github.com/ClickHouse/ClickHouse/pull/42774) ([Alejandro](https://github.com/alexon1234)). * Add function `randCanonical`, which is similar to the `rand` function in Apache Spark or Impala. The function generates pseudo random results with independent and identically distributed uniformly distributed values in [0, 1). [#43124](https://github.com/ClickHouse/ClickHouse/pull/43124) ([李扬](https://github.com/taiyang-li)). * Add function `displayName`, closes [#36770](https://github.com/ClickHouse/ClickHouse/issues/36770). [#37681](https://github.com/ClickHouse/ClickHouse/pull/37681) ([hongbin](https://github.com/xlwh)). diff --git a/src/Functions/modulo.cpp b/src/Functions/modulo.cpp index 9cd104cd1dc..be052b25af4 100644 --- a/src/Functions/modulo.cpp +++ b/src/Functions/modulo.cpp @@ -182,7 +182,7 @@ REGISTER_FUNCTION(ModuloLegacy) struct NamePositiveModulo { - static constexpr auto name = "positive_modulo"; + static constexpr auto name = "positiveModulo"; }; using FunctionPositiveModulo = BinaryArithmeticOverloadResolver; @@ -191,11 +191,17 @@ REGISTER_FUNCTION(PositiveModulo) factory.registerFunction( { R"( -Calculates the remainder when dividing `a` by `b`. Similar to function `modulo` except that `positive_modulo` always return non-negative number. +Calculates the remainder when dividing `a` by `b`. Similar to function `modulo` except that `positiveModulo` always return non-negative number. +Returns the difference between `a` and the nearest integer not greater than `a` divisible by `b`. +In other words, the function returning the modulus (modulo) in the terms of Modular Arithmetic. )", - Documentation::Examples{{"positive_modulo", "SELECT positive_modulo(-1000, 32);"}}, + Documentation::Examples{{"positiveModulo", "SELECT positiveModulo(-1, 10);"}}, Documentation::Categories{"Arithmetic"}}, FunctionFactory::CaseInsensitive); + + factory.registerAlias("positive_modulo", "positiveModulo", FunctionFactory::CaseInsensitive); + /// Compatibility with Spark: + factory.registerAlias("pmod", "positiveModulo", FunctionFactory::CaseInsensitive); } } From 41b557f926cadbed6e62020064ae6d419d33d001 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 17 Nov 2022 09:16:47 +0000 Subject: [PATCH 78/80] Disable DeltaLake and hudi table functions in readonly mode --- .../{TableFunctionDelta.cpp => TableFunctionDeltaLake.cpp} | 6 +++--- .../{TableFunctionDelta.h => TableFunctionDeltaLake.h} | 0 src/TableFunctions/TableFunctionHudi.cpp | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) rename src/TableFunctions/{TableFunctionDelta.cpp => TableFunctionDeltaLake.cpp} (96%) rename src/TableFunctions/{TableFunctionDelta.h => TableFunctionDeltaLake.h} (100%) diff --git a/src/TableFunctions/TableFunctionDelta.cpp b/src/TableFunctions/TableFunctionDeltaLake.cpp similarity index 96% rename from src/TableFunctions/TableFunctionDelta.cpp rename to src/TableFunctions/TableFunctionDeltaLake.cpp index 25ea2aaa77f..0f5801d57ac 100644 --- a/src/TableFunctions/TableFunctionDelta.cpp +++ b/src/TableFunctions/TableFunctionDeltaLake.cpp @@ -13,7 +13,7 @@ # include # include # include -# include +# include # include # include "registerTableFunctions.h" @@ -160,9 +160,9 @@ void registerTableFunctionDelta(TableFunctionFactory & factory) factory.registerFunction( {.documentation = {R"(The table function can be used to read the DeltaLake table stored on object store.)", - Documentation::Examples{{"hudi", "SELECT * FROM deltaLake(url, access_key_id, secret_access_key)"}}, + Documentation::Examples{{"deltaLake", "SELECT * FROM deltaLake(url, access_key_id, secret_access_key)"}}, Documentation::Categories{"DataLake"}}, - .allow_readonly = true}); + .allow_readonly = false}); } } diff --git a/src/TableFunctions/TableFunctionDelta.h b/src/TableFunctions/TableFunctionDeltaLake.h similarity index 100% rename from src/TableFunctions/TableFunctionDelta.h rename to src/TableFunctions/TableFunctionDeltaLake.h diff --git a/src/TableFunctions/TableFunctionHudi.cpp b/src/TableFunctions/TableFunctionHudi.cpp index b1db90da550..2e27d192b58 100644 --- a/src/TableFunctions/TableFunctionHudi.cpp +++ b/src/TableFunctions/TableFunctionHudi.cpp @@ -162,7 +162,7 @@ void registerTableFunctionHudi(TableFunctionFactory & factory) = {R"(The table function can be used to read the Hudi table stored on object store.)", Documentation::Examples{{"hudi", "SELECT * FROM hudi(url, access_key_id, secret_access_key)"}}, Documentation::Categories{"DataLake"}}, - .allow_readonly = true}); + .allow_readonly = false}); } } From 11b535d443f12504c79a48203e86d029005f04e6 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Thu, 17 Nov 2022 11:17:27 +0100 Subject: [PATCH 79/80] impl (#43283) --- docs/en/sql-reference/functions/random-functions.md | 2 +- src/Functions/canonicalRand.cpp | 4 ++-- tests/queries/0_stateless/01047_nullable_rand.sql | 8 ++++---- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/en/sql-reference/functions/random-functions.md b/docs/en/sql-reference/functions/random-functions.md index 08f2620a009..4efa2131eb6 100644 --- a/docs/en/sql-reference/functions/random-functions.md +++ b/docs/en/sql-reference/functions/random-functions.md @@ -24,7 +24,7 @@ Returns a pseudo-random UInt64 number, evenly distributed among all UInt64-type Uses a linear congruential generator. -## canonicalRand +## randCanonical The function generates pseudo random results with independent and identically distributed uniformly distributed values in [0, 1). Non-deterministic. Return type is Float64. diff --git a/src/Functions/canonicalRand.cpp b/src/Functions/canonicalRand.cpp index d0b8c655e14..0f168142177 100644 --- a/src/Functions/canonicalRand.cpp +++ b/src/Functions/canonicalRand.cpp @@ -34,7 +34,7 @@ private: struct NameCanonicalRand { - static constexpr auto name = "canonicalRand"; + static constexpr auto name = "randCanonical"; }; class FunctionCanonicalRand : public FunctionRandomImpl @@ -52,7 +52,7 @@ REGISTER_FUNCTION(CanonicalRand) The function generates pseudo random results with independent and identically distributed uniformly distributed values in [0, 1). Non-deterministic. Return type is Float64. )", - Documentation::Examples{{"canonicalRand", "SELECT canonicalRand()"}}, + Documentation::Examples{{"randCanonical", "SELECT randCanonical()"}}, Documentation::Categories{"Mathematical"}}); } diff --git a/tests/queries/0_stateless/01047_nullable_rand.sql b/tests/queries/0_stateless/01047_nullable_rand.sql index 9d3c361c543..e5633637db6 100644 --- a/tests/queries/0_stateless/01047_nullable_rand.sql +++ b/tests/queries/0_stateless/01047_nullable_rand.sql @@ -1,13 +1,13 @@ select toTypeName(rand(cast(4 as Nullable(UInt8)))); -select toTypeName(canonicalRand(CAST(4 as Nullable(UInt8)))); +select toTypeName(randCanonical(CAST(4 as Nullable(UInt8)))); select toTypeName(randConstant(CAST(4 as Nullable(UInt8)))); select toTypeName(rand(Null)); -select toTypeName(canonicalRand(Null)); +select toTypeName(randCanonical(Null)); select toTypeName(randConstant(Null)); select rand(cast(4 as Nullable(UInt8))) * 0; -select canonicalRand(cast(4 as Nullable(UInt8))) * 0; +select randCanonical(cast(4 as Nullable(UInt8))) * 0; select randConstant(CAST(4 as Nullable(UInt8))) * 0; select rand(Null) * 0; -select canonicalRand(Null) * 0; +select randCanonical(Null) * 0; select randConstant(Null) * 0; From 7beb58b0cf2cd04fdf7548ce0574bbb1ec6280a5 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Thu, 17 Nov 2022 13:19:02 +0100 Subject: [PATCH 80/80] Optimize merge of uniqExact without_key (#43072) * impl for uniqExact * rm unused (read|write)Text methods * fix style * small fixes * impl for variadic uniqExact * refactor * fix style * more agressive inlining * disable if max_threads=1 * small improvements * review fixes * Revert "rm unused (read|write)Text methods" This reverts commit a7e74805842572f6fa2c28ea111ab8ca7c19ad21. * encapsulate is_able_to_parallelize_merge in Data * encapsulate is_exact & argument_is_tuple in Data --- .../AggregateFunctionUniq.cpp | 56 ++-- .../AggregateFunctionUniq.h | 307 +++++++++++++++--- src/AggregateFunctions/Helpers.h | 13 + src/AggregateFunctions/IAggregateFunction.h | 15 +- src/AggregateFunctions/UniqExactSet.h | 112 +++++++ src/AggregateFunctions/UniquesHashSet.h | 2 +- src/Common/HashTable/HashSet.h | 57 ++++ src/Common/HashTable/HashTable.h | 20 +- src/Common/HashTable/TwoLevelHashTable.h | 8 +- src/Common/examples/small_table.cpp | 2 +- src/Common/tests/gtest_hash_table.cpp | 56 ++++ src/Interpreters/Aggregator.cpp | 12 +- .../test_aggregate_function_state.py | 228 +++++++++++++ .../test_aggregate_function_state_avg.py | 82 ----- tests/performance/uniq_without_key.xml | 33 ++ 15 files changed, 832 insertions(+), 171 deletions(-) create mode 100644 src/AggregateFunctions/UniqExactSet.h create mode 100644 tests/integration/test_backward_compatibility/test_aggregate_function_state.py delete mode 100644 tests/integration/test_backward_compatibility/test_aggregate_function_state_avg.py create mode 100644 tests/performance/uniq_without_key.xml diff --git a/src/AggregateFunctions/AggregateFunctionUniq.cpp b/src/AggregateFunctions/AggregateFunctionUniq.cpp index 0d1c831c839..1c90767131c 100644 --- a/src/AggregateFunctions/AggregateFunctionUniq.cpp +++ b/src/AggregateFunctions/AggregateFunctionUniq.cpp @@ -9,6 +9,7 @@ #include #include +#include namespace DB { @@ -28,8 +29,9 @@ namespace /** `DataForVariadic` is a data structure that will be used for `uniq` aggregate function of multiple arguments. * It differs, for example, in that it uses a trivial hash function, since `uniq` of many arguments first hashes them out itself. */ -template -AggregateFunctionPtr createAggregateFunctionUniq(const std::string & name, const DataTypes & argument_types, const Array & params, const Settings *) +template typename DataForVariadic> +AggregateFunctionPtr +createAggregateFunctionUniq(const std::string & name, const DataTypes & argument_types, const Array & params, const Settings *) { assertNoParameters(name, params); @@ -61,21 +63,22 @@ AggregateFunctionPtr createAggregateFunctionUniq(const std::string & name, const else if (which.isTuple()) { if (use_exact_hash_function) - return std::make_shared>(argument_types); + return std::make_shared>>(argument_types); else - return std::make_shared>(argument_types); + return std::make_shared>>(argument_types); } } /// "Variadic" method also works as a fallback generic case for single argument. if (use_exact_hash_function) - return std::make_shared>(argument_types); + return std::make_shared>>(argument_types); else - return std::make_shared>(argument_types); + return std::make_shared>>(argument_types); } -template class Data, typename DataForVariadic> -AggregateFunctionPtr createAggregateFunctionUniq(const std::string & name, const DataTypes & argument_types, const Array & params, const Settings *) +template typename Data, template typename DataForVariadic, bool is_able_to_parallelize_merge> +AggregateFunctionPtr +createAggregateFunctionUniq(const std::string & name, const DataTypes & argument_types, const Array & params, const Settings *) { assertNoParameters(name, params); @@ -91,35 +94,35 @@ AggregateFunctionPtr createAggregateFunctionUniq(const std::string & name, const { const IDataType & argument_type = *argument_types[0]; - AggregateFunctionPtr res(createWithNumericType(*argument_types[0], argument_types)); + AggregateFunctionPtr res(createWithNumericType(*argument_types[0], argument_types)); WhichDataType which(argument_type); if (res) return res; else if (which.isDate()) - return std::make_shared>>(argument_types); + return std::make_shared>>(argument_types); else if (which.isDate32()) - return std::make_shared>>(argument_types); + return std::make_shared>>(argument_types); else if (which.isDateTime()) - return std::make_shared>>(argument_types); + return std::make_shared>>(argument_types); else if (which.isStringOrFixedString()) - return std::make_shared>>(argument_types); + return std::make_shared>>(argument_types); else if (which.isUUID()) - return std::make_shared>>(argument_types); + return std::make_shared>>(argument_types); else if (which.isTuple()) { if (use_exact_hash_function) - return std::make_shared>(argument_types); + return std::make_shared>>(argument_types); else - return std::make_shared>(argument_types); + return std::make_shared>>(argument_types); } } /// "Variadic" method also works as a fallback generic case for single argument. if (use_exact_hash_function) - return std::make_shared>(argument_types); + return std::make_shared>>(argument_types); else - return std::make_shared>(argument_types); + return std::make_shared>>(argument_types); } } @@ -132,14 +135,23 @@ void registerAggregateFunctionsUniq(AggregateFunctionFactory & factory) {createAggregateFunctionUniq, properties}); factory.registerFunction("uniqHLL12", - {createAggregateFunctionUniq, properties}); + {createAggregateFunctionUniq, properties}); - factory.registerFunction("uniqExact", - {createAggregateFunctionUniq>, properties}); + auto assign_bool_param = [](const std::string & name, const DataTypes & argument_types, const Array & params, const Settings * settings) + { + /// Using two level hash set if we wouldn't be able to merge in parallel can cause ~10% slowdown. + if (settings && settings->max_threads > 1) + return createAggregateFunctionUniq< + true, AggregateFunctionUniqExactData, AggregateFunctionUniqExactDataForVariadic, true /* is_able_to_parallelize_merge */>(name, argument_types, params, settings); + else + return createAggregateFunctionUniq< + true, AggregateFunctionUniqExactData, AggregateFunctionUniqExactDataForVariadic, false /* is_able_to_parallelize_merge */>(name, argument_types, params, settings); + }; + factory.registerFunction("uniqExact", {assign_bool_param, properties}); #if USE_DATASKETCHES factory.registerFunction("uniqTheta", - {createAggregateFunctionUniq, properties}); + {createAggregateFunctionUniq, properties}); #endif } diff --git a/src/AggregateFunctions/AggregateFunctionUniq.h b/src/AggregateFunctions/AggregateFunctionUniq.h index fe2530800cc..1a98bfc8456 100644 --- a/src/AggregateFunctions/AggregateFunctionUniq.h +++ b/src/AggregateFunctions/AggregateFunctionUniq.h @@ -1,7 +1,10 @@ #pragma once -#include +#include +#include #include +#include +#include #include @@ -13,17 +16,18 @@ #include +#include #include #include #include -#include -#include #include +#include -#include #include #include +#include #include +#include namespace DB @@ -37,94 +41,128 @@ struct AggregateFunctionUniqUniquesHashSetData using Set = UniquesHashSet>; Set set; + constexpr static bool is_able_to_parallelize_merge = false; + constexpr static bool is_variadic = false; + static String getName() { return "uniq"; } }; /// For a function that takes multiple arguments. Such a function pre-hashes them in advance, so TrivialHash is used here. +template struct AggregateFunctionUniqUniquesHashSetDataForVariadic { using Set = UniquesHashSet; Set set; + constexpr static bool is_able_to_parallelize_merge = false; + constexpr static bool is_variadic = true; + constexpr static bool is_exact = is_exact_; + constexpr static bool argument_is_tuple = argument_is_tuple_; + static String getName() { return "uniq"; } }; /// uniqHLL12 -template +template struct AggregateFunctionUniqHLL12Data { using Set = HyperLogLogWithSmallSetOptimization; Set set; - static String getName() { return "uniqHLL12"; } -}; - -template <> -struct AggregateFunctionUniqHLL12Data -{ - using Set = HyperLogLogWithSmallSetOptimization; - Set set; + constexpr static bool is_able_to_parallelize_merge = is_able_to_parallelize_merge_; + constexpr static bool is_variadic = false; static String getName() { return "uniqHLL12"; } }; template <> -struct AggregateFunctionUniqHLL12Data +struct AggregateFunctionUniqHLL12Data { using Set = HyperLogLogWithSmallSetOptimization; Set set; + constexpr static bool is_able_to_parallelize_merge = false; + constexpr static bool is_variadic = false; + static String getName() { return "uniqHLL12"; } }; +template <> +struct AggregateFunctionUniqHLL12Data +{ + using Set = HyperLogLogWithSmallSetOptimization; + Set set; + + constexpr static bool is_able_to_parallelize_merge = false; + constexpr static bool is_variadic = false; + + static String getName() { return "uniqHLL12"; } +}; + +template struct AggregateFunctionUniqHLL12DataForVariadic { using Set = HyperLogLogWithSmallSetOptimization; Set set; + constexpr static bool is_able_to_parallelize_merge = is_able_to_parallelize_merge_; + constexpr static bool is_variadic = true; + constexpr static bool is_exact = is_exact_; + constexpr static bool argument_is_tuple = argument_is_tuple_; + static String getName() { return "uniqHLL12"; } }; /// uniqExact -template +template struct AggregateFunctionUniqExactData { using Key = T; /// When creating, the hash table must be small. - using Set = HashSet< - Key, - HashCRC32, - HashTableGrower<4>, - HashTableAllocatorWithStackMemory>; + using SingleLevelSet = HashSet, HashTableGrower<4>, HashTableAllocatorWithStackMemory>; + using TwoLevelSet = TwoLevelHashSet>; + using Set = UniqExactSet; Set set; + constexpr static bool is_able_to_parallelize_merge = is_able_to_parallelize_merge_; + constexpr static bool is_variadic = false; + static String getName() { return "uniqExact"; } }; /// For rows, we put the SipHash values (128 bits) into the hash table. -template <> -struct AggregateFunctionUniqExactData +template +struct AggregateFunctionUniqExactData { using Key = UInt128; /// When creating, the hash table must be small. - using Set = HashSet< - Key, - UInt128TrivialHash, - HashTableGrower<3>, - HashTableAllocatorWithStackMemory>; + using SingleLevelSet = HashSet, HashTableAllocatorWithStackMemory>; + using TwoLevelSet = TwoLevelHashSet; + using Set = UniqExactSet; Set set; + constexpr static bool is_able_to_parallelize_merge = is_able_to_parallelize_merge_; + constexpr static bool is_variadic = false; + static String getName() { return "uniqExact"; } }; +template +struct AggregateFunctionUniqExactDataForVariadic : AggregateFunctionUniqExactData +{ + constexpr static bool is_able_to_parallelize_merge = is_able_to_parallelize_merge_; + constexpr static bool is_variadic = true; + constexpr static bool is_exact = is_exact_; + constexpr static bool argument_is_tuple = argument_is_tuple_; +}; /// uniqTheta #if USE_DATASKETCHES @@ -134,14 +172,37 @@ struct AggregateFunctionUniqThetaData using Set = ThetaSketchData; Set set; + constexpr static bool is_able_to_parallelize_merge = false; + constexpr static bool is_variadic = false; + static String getName() { return "uniqTheta"; } }; +template +struct AggregateFunctionUniqThetaDataForVariadic : AggregateFunctionUniqThetaData +{ + constexpr static bool is_able_to_parallelize_merge = false; + constexpr static bool is_variadic = true; + constexpr static bool is_exact = is_exact_; + constexpr static bool argument_is_tuple = argument_is_tuple_; +}; + #endif namespace detail { +template +struct IsUniqExactSet : std::false_type +{ +}; + +template +struct IsUniqExactSet> : std::true_type +{ +}; + + /** Hash function for uniq. */ template struct AggregateFunctionUniqTraits @@ -162,17 +223,31 @@ template struct AggregateFunctionUniqTraits }; -/** The structure for the delegation work to add one element to the `uniq` aggregate functions. +/** The structure for the delegation work to add elements to the `uniq` aggregate functions. * Used for partial specialization to add strings. */ template -struct OneAdder +struct Adder { - static void ALWAYS_INLINE add(Data & data, const IColumn & column, size_t row_num) + /// We have to introduce this template parameter (and a bunch of ugly code dealing with it), because we cannot + /// add runtime branches in whatever_hash_set::insert - it will immediately pop up in the perf top. + template + static void ALWAYS_INLINE add(Data & data, const IColumn ** columns, size_t num_args, size_t row_num) { - if constexpr (std::is_same_v - || std::is_same_v>) + if constexpr (Data::is_variadic) { + if constexpr (IsUniqExactSet::value) + data.set.template insert( + UniqVariadicHash::apply(num_args, columns, row_num)); + else + data.set.insert(T{UniqVariadicHash::apply(num_args, columns, row_num)}); + } + else if constexpr ( + std::is_same_v< + Data, + AggregateFunctionUniqUniquesHashSetData> || std::is_same_v>) + { + const auto & column = *columns[0]; if constexpr (!std::is_same_v) { using ValueType = typename decltype(data.set)::value_type; @@ -185,11 +260,13 @@ struct OneAdder data.set.insert(CityHash_v1_0_2::CityHash64(value.data, value.size)); } } - else if constexpr (std::is_same_v>) + else if constexpr (std::is_same_v>) { + const auto & column = *columns[0]; if constexpr (!std::is_same_v) { - data.set.insert(assert_cast &>(column).getData()[row_num]); + data.set.template insert( + assert_cast &>(column).getData()[row_num]); } else { @@ -200,16 +277,72 @@ struct OneAdder hash.update(value.data, value.size); hash.get128(key); - data.set.insert(key); + data.set.template insert(key); } } #if USE_DATASKETCHES else if constexpr (std::is_same_v) { + const auto & column = *columns[0]; data.set.insertOriginal(column.getDataAt(row_num)); } #endif } + + static void ALWAYS_INLINE + add(Data & data, const IColumn ** columns, size_t num_args, size_t row_begin, size_t row_end, const char8_t * flags, const UInt8 * null_map) + { + bool use_single_level_hash_table = true; + if constexpr (Data::is_able_to_parallelize_merge) + use_single_level_hash_table = data.set.isSingleLevel(); + + if (use_single_level_hash_table) + addImpl(data, columns, num_args, row_begin, row_end, flags, null_map); + else + addImpl(data, columns, num_args, row_begin, row_end, flags, null_map); + + if constexpr (Data::is_able_to_parallelize_merge) + { + if (data.set.isSingleLevel() && data.set.size() > 100'000) + data.set.convertToTwoLevel(); + } + } + +private: + template + static void ALWAYS_INLINE + addImpl(Data & data, const IColumn ** columns, size_t num_args, size_t row_begin, size_t row_end, const char8_t * flags, const UInt8 * null_map) + { + if (!flags) + { + if (!null_map) + { + for (size_t row = row_begin; row < row_end; ++row) + add(data, columns, num_args, row); + } + else + { + for (size_t row = row_begin; row < row_end; ++row) + if (!null_map[row]) + add(data, columns, num_args, row); + } + } + else + { + if (!null_map) + { + for (size_t row = row_begin; row < row_end; ++row) + if (flags[row]) + add(data, columns, num_args, row); + } + else + { + for (size_t row = row_begin; row < row_end; ++row) + if (!null_map[row] && flags[row]) + add(data, columns, num_args, row); + } + } + } }; } @@ -219,9 +352,15 @@ struct OneAdder template class AggregateFunctionUniq final : public IAggregateFunctionDataHelper> { +private: + static constexpr size_t num_args = 1; + static constexpr bool is_able_to_parallelize_merge = Data::is_able_to_parallelize_merge; + public: - AggregateFunctionUniq(const DataTypes & argument_types_) - : IAggregateFunctionDataHelper>(argument_types_, {}) {} + explicit AggregateFunctionUniq(const DataTypes & argument_types_) + : IAggregateFunctionDataHelper>(argument_types_, {}) + { + } String getName() const override { return Data::getName(); } @@ -235,7 +374,18 @@ public: /// ALWAYS_INLINE is required to have better code layout for uniqHLL12 function void ALWAYS_INLINE add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override { - detail::OneAdder::add(this->data(place), *columns[0], row_num); + detail::Adder::add(this->data(place), columns, num_args, row_num); + } + + void ALWAYS_INLINE addBatchSinglePlace( + size_t row_begin, size_t row_end, AggregateDataPtr __restrict place, const IColumn ** columns, Arena *, ssize_t if_argument_pos) + const override + { + const char8_t * flags = nullptr; + if (if_argument_pos >= 0) + flags = assert_cast(*columns[if_argument_pos]).getData().data(); + + detail::Adder::add(this->data(place), columns, num_args, row_begin, row_end, flags, nullptr /* null_map */); } void addManyDefaults( @@ -244,7 +394,23 @@ public: size_t /*length*/, Arena * /*arena*/) const override { - detail::OneAdder::add(this->data(place), *columns[0], 0); + detail::Adder::add(this->data(place), columns, num_args, 0); + } + + void addBatchSinglePlaceNotNull( + size_t row_begin, + size_t row_end, + AggregateDataPtr __restrict place, + const IColumn ** columns, + const UInt8 * null_map, + Arena *, + ssize_t if_argument_pos) const override + { + const char8_t * flags = nullptr; + if (if_argument_pos >= 0) + flags = assert_cast(*columns[if_argument_pos]).getData().data(); + + detail::Adder::add(this->data(place), columns, num_args, row_begin, row_end, flags, null_map); } void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override @@ -252,6 +418,16 @@ public: this->data(place).set.merge(this->data(rhs).set); } + bool isAbleToParallelizeMerge() const override { return is_able_to_parallelize_merge; } + + void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, Arena *) const override + { + if constexpr (is_able_to_parallelize_merge) + this->data(place).set.merge(this->data(rhs).set, &thread_pool); + else + this->data(place).set.merge(this->data(rhs).set); + } + void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional /* version */) const override { this->data(place).set.write(buf); @@ -273,15 +449,20 @@ public: * You can pass multiple arguments as is; You can also pass one argument - a tuple. * But (for the possibility of efficient implementation), you can not pass several arguments, among which there are tuples. */ -template -class AggregateFunctionUniqVariadic final : public IAggregateFunctionDataHelper> +template +class AggregateFunctionUniqVariadic final : public IAggregateFunctionDataHelper> { private: + using T = typename Data::Set::value_type; + + static constexpr size_t is_able_to_parallelize_merge = Data::is_able_to_parallelize_merge; + static constexpr size_t argument_is_tuple = Data::argument_is_tuple; + size_t num_args = 0; public: - AggregateFunctionUniqVariadic(const DataTypes & arguments) - : IAggregateFunctionDataHelper>(arguments, {}) + explicit AggregateFunctionUniqVariadic(const DataTypes & arguments) + : IAggregateFunctionDataHelper>(arguments, {}) { if (argument_is_tuple) num_args = typeid_cast(*arguments[0]).getElements().size(); @@ -300,8 +481,34 @@ public: void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override { - this->data(place).set.insert(typename Data::Set::value_type( - UniqVariadicHash::apply(num_args, columns, row_num))); + detail::Adder::add(this->data(place), columns, num_args, row_num); + } + + void addBatchSinglePlace( + size_t row_begin, size_t row_end, AggregateDataPtr __restrict place, const IColumn ** columns, Arena *, ssize_t if_argument_pos) + const override + { + const char8_t * flags = nullptr; + if (if_argument_pos >= 0) + flags = assert_cast(*columns[if_argument_pos]).getData().data(); + + detail::Adder::add(this->data(place), columns, num_args, row_begin, row_end, flags, nullptr /* null_map */); + } + + void addBatchSinglePlaceNotNull( + size_t row_begin, + size_t row_end, + AggregateDataPtr __restrict place, + const IColumn ** columns, + const UInt8 * null_map, + Arena *, + ssize_t if_argument_pos) const override + { + const char8_t * flags = nullptr; + if (if_argument_pos >= 0) + flags = assert_cast(*columns[if_argument_pos]).getData().data(); + + detail::Adder::add(this->data(place), columns, num_args, row_begin, row_end, flags, null_map); } void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override @@ -309,6 +516,16 @@ public: this->data(place).set.merge(this->data(rhs).set); } + bool isAbleToParallelizeMerge() const override { return is_able_to_parallelize_merge; } + + void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, Arena *) const override + { + if constexpr (is_able_to_parallelize_merge) + this->data(place).set.merge(this->data(rhs).set, &thread_pool); + else + this->data(place).set.merge(this->data(rhs).set); + } + void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional /* version */) const override { this->data(place).set.write(buf); diff --git a/src/AggregateFunctions/Helpers.h b/src/AggregateFunctions/Helpers.h index 6e140f4b9cf..c97733571a3 100644 --- a/src/AggregateFunctions/Helpers.h +++ b/src/AggregateFunctions/Helpers.h @@ -74,6 +74,19 @@ static IAggregateFunction * createWithNumericType(const IDataType & argument_typ return nullptr; } +template