From 3b5c41fc6d83949026c59067232c9f45e03ea32f Mon Sep 17 00:00:00 2001 From: Nicolae Vartolomei Date: Tue, 6 Jul 2021 14:04:49 +0100 Subject: [PATCH 01/29] Test CTE with aggregation over distributed table --- .../test_cte_distributed.py | 54 +++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 tests/integration/test_backward_compatibility/test_cte_distributed.py diff --git a/tests/integration/test_backward_compatibility/test_cte_distributed.py b/tests/integration/test_backward_compatibility/test_cte_distributed.py new file mode 100644 index 00000000000..64090b892b8 --- /dev/null +++ b/tests/integration/test_backward_compatibility/test_cte_distributed.py @@ -0,0 +1,54 @@ +import pytest + +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__, name="cte_distributed") +node1 = cluster.add_instance('node1', with_zookeeper=False) +node2 = cluster.add_instance('node2', + with_zookeeper=False, image='yandex/clickhouse-server', tag='19.16.9.37', stay_alive=True, + with_installed_binary=True) + + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + yield cluster + + finally: + cluster.shutdown() + + + +def test_cte_distributed(start_cluster): + node2.query(""" +WITH + quantile(0.05)(cnt) as p05, + quantile(0.95)(cnt) as p95, + p95 - p05 as inter_percentile_range +SELECT + sum(cnt) as total_requests, + count() as data_points, + inter_percentile_range +FROM ( + SELECT + count() as cnt + FROM remote('node{1,2}', numbers(10)) + GROUP BY number +)""") + + node1.query(""" +WITH + quantile(0.05)(cnt) as p05, + quantile(0.95)(cnt) as p95, + p95 - p05 as inter_percentile_range +SELECT + sum(cnt) as total_requests, + count() as data_points, + inter_percentile_range +FROM ( + SELECT + count() as cnt + FROM remote('node{1,2}', numbers(10)) + GROUP BY number +)""") From 16532658c2615379e00de8aea786bf93a28896ab Mon Sep 17 00:00:00 2001 From: Ilya Golshtein Date: Fri, 9 Jul 2021 19:18:22 +0300 Subject: [PATCH 02/29] Avro string for ClickHouse string --- src/Core/Settings.h | 1 + src/Formats/FormatFactory.cpp | 1 + src/Formats/FormatSettings.h | 2 +- .../Formats/Impl/AvroRowOutputFormat.cpp | 65 +++++++++++++++---- .../Formats/Impl/AvroRowOutputFormat.h | 11 ++-- 5 files changed, 62 insertions(+), 18 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 8c733415dec..ac9585c6783 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -524,6 +524,7 @@ class IColumn; M(Bool, input_format_values_accurate_types_of_literals, true, "For Values format: when parsing and interpreting expressions using template, check actual type of literal to avoid possible overflow and precision issues.", 0) \ M(Bool, input_format_avro_allow_missing_fields, false, "For Avro/AvroConfluent format: when field is not found in schema use default value instead of error", 0) \ M(URI, format_avro_schema_registry_url, "", "For AvroConfluent format: Confluent Schema Registry URL.", 0) \ + M(String, output_format_avro_string_column_pattern, "", "For Avro format: regexp of String columns to select as AVRO string.", 0) \ \ M(Bool, output_format_json_quote_64bit_integers, true, "Controls quoting of 64-bit integers in JSON output format.", 0) \ \ diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index a00839fc5f5..7cd034aab23 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -60,6 +60,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.avro.output_codec = settings.output_format_avro_codec; format_settings.avro.output_sync_interval = settings.output_format_avro_sync_interval; format_settings.avro.schema_registry_url = settings.format_avro_schema_registry_url.toString(); + format_settings.avro.string_column_pattern = settings.output_format_avro_string_column_pattern.toString(); format_settings.csv.allow_double_quotes = settings.format_csv_allow_double_quotes; format_settings.csv.allow_single_quotes = settings.format_csv_allow_single_quotes; format_settings.csv.crlf_end_of_line = settings.output_format_csv_crlf_end_of_line; diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index 1773f2cc2c6..57dfb9023cc 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -61,6 +61,7 @@ struct FormatSettings String output_codec; UInt64 output_sync_interval = 16 * 1024; bool allow_missing_fields = false; + String string_column_pattern; } avro; struct CSV @@ -169,4 +170,3 @@ struct FormatSettings }; } - diff --git a/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp b/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp index 81922bdde80..e20ad539979 100644 --- a/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp @@ -41,6 +41,7 @@ #include #include +#include namespace DB { @@ -48,8 +49,36 @@ namespace ErrorCodes { extern const int ILLEGAL_COLUMN; extern const int BAD_ARGUMENTS; + extern const int CANNOT_COMPILE_REGEXP; } +class AvroSerializerTraits +{ +public: + bool isStringAsString(const String & column_name) + { + return RE2::PartialMatch/*FullMatch*/(column_name, string_to_string_regexp); + } + + AvroSerializerTraits(const FormatSettings & settings_) + : string_to_string_regexp(settings_.avro.string_column_pattern) + { + if (!string_to_string_regexp.ok()) + throw DB::Exception( + "Avro: cannot compile re2: " + settings_.avro.string_column_pattern + ", error: " + string_to_string_regexp.error() + + ". Look at https://github.com/google/re2/wiki/Syntax for reference.", + DB::ErrorCodes::CANNOT_COMPILE_REGEXP); + } + + ~AvroSerializerTraits() + { + } + +private: + const RE2 string_to_string_regexp; +}; + + class OutputStreamWriteBufferAdapter : public avro::OutputStream { public: @@ -75,7 +104,7 @@ private: }; -AvroSerializer::SchemaWithSerializeFn AvroSerializer::createSchemaWithSerializeFn(DataTypePtr data_type, size_t & type_name_increment) +AvroSerializer::SchemaWithSerializeFn AvroSerializer::createSchemaWithSerializeFn(DataTypePtr data_type, size_t & type_name_increment, const String & column_name) { ++type_name_increment; @@ -161,11 +190,20 @@ AvroSerializer::SchemaWithSerializeFn AvroSerializer::createSchemaWithSerializeF }}; } case TypeIndex::String: - return {avro::BytesSchema(), [](const IColumn & column, size_t row_num, avro::Encoder & encoder) - { - const StringRef & s = assert_cast(column).getDataAt(row_num); - encoder.encodeBytes(reinterpret_cast(s.data), s.size); - }}; + if (traits->isStringAsString(column_name)) + return {avro::StringSchema(), [](const IColumn & column, size_t row_num, avro::Encoder & encoder) + { + const StringRef & s = assert_cast(column).getDataAt(row_num); + encoder.encodeString(s.toString()); + } + }; + else + return {avro::BytesSchema(), [](const IColumn & column, size_t row_num, avro::Encoder & encoder) + { + const StringRef & s = assert_cast(column).getDataAt(row_num); + encoder.encodeBytes(reinterpret_cast(s.data), s.size); + } + }; case TypeIndex::FixedString: { auto size = data_type->getSizeOfValueInMemory(); @@ -223,7 +261,7 @@ AvroSerializer::SchemaWithSerializeFn AvroSerializer::createSchemaWithSerializeF case TypeIndex::Array: { const auto & array_type = assert_cast(*data_type); - auto nested_mapping = createSchemaWithSerializeFn(array_type.getNestedType(), type_name_increment); + auto nested_mapping = createSchemaWithSerializeFn(array_type.getNestedType(), type_name_increment, column_name); auto schema = avro::ArraySchema(nested_mapping.schema); return {schema, [nested_mapping](const IColumn & column, size_t row_num, avro::Encoder & encoder) { @@ -249,7 +287,7 @@ AvroSerializer::SchemaWithSerializeFn AvroSerializer::createSchemaWithSerializeF case TypeIndex::Nullable: { auto nested_type = removeNullable(data_type); - auto nested_mapping = createSchemaWithSerializeFn(nested_type, type_name_increment); + auto nested_mapping = createSchemaWithSerializeFn(nested_type, type_name_increment, column_name); if (nested_type->getTypeId() == TypeIndex::Nothing) { return nested_mapping; @@ -278,7 +316,7 @@ AvroSerializer::SchemaWithSerializeFn AvroSerializer::createSchemaWithSerializeF case TypeIndex::LowCardinality: { const auto & nested_type = removeLowCardinality(data_type); - auto nested_mapping = createSchemaWithSerializeFn(nested_type, type_name_increment); + auto nested_mapping = createSchemaWithSerializeFn(nested_type, type_name_increment, column_name); return {nested_mapping.schema, [nested_mapping](const IColumn & column, size_t row_num, avro::Encoder & encoder) { const auto & col = assert_cast(column); @@ -294,7 +332,8 @@ AvroSerializer::SchemaWithSerializeFn AvroSerializer::createSchemaWithSerializeF } -AvroSerializer::AvroSerializer(const ColumnsWithTypeAndName & columns) +AvroSerializer::AvroSerializer(const ColumnsWithTypeAndName & columns, std::unique_ptr traits_) + : traits(std::move(traits_)) { avro::RecordSchema record_schema("row"); @@ -303,7 +342,7 @@ AvroSerializer::AvroSerializer(const ColumnsWithTypeAndName & columns) { try { - auto field_mapping = createSchemaWithSerializeFn(column.type, type_name_increment); + auto field_mapping = createSchemaWithSerializeFn(column.type, type_name_increment, column.name); serialize_fns.push_back(field_mapping.serialize); //TODO: verify name starts with A-Za-z_ record_schema.addField(column.name, field_mapping.schema); @@ -314,7 +353,7 @@ AvroSerializer::AvroSerializer(const ColumnsWithTypeAndName & columns) throw; } } - schema.setSchema(record_schema); + valid_schema.setSchema(record_schema); } void AvroSerializer::serializeRow(const Columns & columns, size_t row_num, avro::Encoder & encoder) @@ -350,7 +389,7 @@ AvroRowOutputFormat::AvroRowOutputFormat( WriteBuffer & out_, const Block & header_, const RowOutputFormatParams & params_, const FormatSettings & settings_) : IRowOutputFormat(header_, out_, params_) , settings(settings_) - , serializer(header_.getColumnsWithTypeAndName()) + , serializer(header_.getColumnsWithTypeAndName(), std::make_unique(settings)) , file_writer( std::make_unique(out_), serializer.getSchema(), diff --git a/src/Processors/Formats/Impl/AvroRowOutputFormat.h b/src/Processors/Formats/Impl/AvroRowOutputFormat.h index 8d0581d3307..fa4cedf1cc2 100644 --- a/src/Processors/Formats/Impl/AvroRowOutputFormat.h +++ b/src/Processors/Formats/Impl/AvroRowOutputFormat.h @@ -18,11 +18,13 @@ namespace DB { class WriteBuffer; +class AvroSerializerTraits; + class AvroSerializer { public: - AvroSerializer(const ColumnsWithTypeAndName & columns); - const avro::ValidSchema & getSchema() const { return schema; } + AvroSerializer(const ColumnsWithTypeAndName & columns, std::unique_ptr); + const avro::ValidSchema & getSchema() const { return valid_schema; } void serializeRow(const Columns & columns, size_t row_num, avro::Encoder & encoder); private: @@ -34,10 +36,11 @@ private: }; /// Type names for different complex types (e.g. enums, fixed strings) must be unique. We use simple incremental number to give them different names. - static SchemaWithSerializeFn createSchemaWithSerializeFn(DataTypePtr data_type, size_t & type_name_increment); + /*static*/ SchemaWithSerializeFn createSchemaWithSerializeFn(DataTypePtr data_type, size_t & type_name_increment, const String & column_name); std::vector serialize_fns; - avro::ValidSchema schema; + avro::ValidSchema valid_schema; + std::unique_ptr traits; }; class AvroRowOutputFormat : public IRowOutputFormat From 020cce63b0ccdae303d56794a318752d31b22ef3 Mon Sep 17 00:00:00 2001 From: Ilya Golshtein Date: Mon, 12 Jul 2021 13:19:48 +0300 Subject: [PATCH 03/29] Avro string for ClickHouse string tests and doc --- docs/en/interfaces/formats.md | 11 ++++++----- docs/en/operations/settings/settings.md | 9 ++++++++- .../Formats/Impl/AvroRowOutputFormat.cpp | 18 +++++++++--------- .../queries/0_stateless/01060_avro.reference | 6 ++++++ tests/queries/0_stateless/01060_avro.sh | 19 +++++++++++++++++++ 5 files changed, 48 insertions(+), 15 deletions(-) diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index eb288721231..1f21a0a3a3d 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -1130,17 +1130,18 @@ The table below shows supported data types and how they match ClickHouse [data t | `boolean`, `int`, `long`, `float`, `double` | [Int64](../sql-reference/data-types/int-uint.md), [UInt64](../sql-reference/data-types/int-uint.md) | `long` | | `boolean`, `int`, `long`, `float`, `double` | [Float32](../sql-reference/data-types/float.md) | `float` | | `boolean`, `int`, `long`, `float`, `double` | [Float64](../sql-reference/data-types/float.md) | `double` | -| `bytes`, `string`, `fixed`, `enum` | [String](../sql-reference/data-types/string.md) | `bytes` | +| `bytes`, `string`, `fixed`, `enum` | [String](../sql-reference/data-types/string.md) | `bytes` or `string` \* | | `bytes`, `string`, `fixed` | [FixedString(N)](../sql-reference/data-types/fixedstring.md) | `fixed(N)` | | `enum` | [Enum(8\|16)](../sql-reference/data-types/enum.md) | `enum` | | `array(T)` | [Array(T)](../sql-reference/data-types/array.md) | `array(T)` | | `union(null, T)`, `union(T, null)` | [Nullable(T)](../sql-reference/data-types/date.md) | `union(null, T)` | | `null` | [Nullable(Nothing)](../sql-reference/data-types/special-data-types/nothing.md) | `null` | -| `int (date)` \* | [Date](../sql-reference/data-types/date.md) | `int (date)` \* | -| `long (timestamp-millis)` \* | [DateTime64(3)](../sql-reference/data-types/datetime.md) | `long (timestamp-millis)` \* | -| `long (timestamp-micros)` \* | [DateTime64(6)](../sql-reference/data-types/datetime.md) | `long (timestamp-micros)` \* | +| `int (date)` \** | [Date](../sql-reference/data-types/date.md) | `int (date)` \** | +| `long (timestamp-millis)` \** | [DateTime64(3)](../sql-reference/data-types/datetime.md) | `long (timestamp-millis)` \* | +| `long (timestamp-micros)` \** | [DateTime64(6)](../sql-reference/data-types/datetime.md) | `long (timestamp-micros)` \* | -\* [Avro logical types](https://avro.apache.org/docs/current/spec.html#Logical+Types) +\* `bytes` is default, controlled by [output_format_avro_string_column_pattern](../operations/settings/settings.md#settings-output_format_avro_string_column_pattern) +\** [Avro logical types](https://avro.apache.org/docs/current/spec.html#Logical+Types) Unsupported Avro data types: `record` (non-root), `map` diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 9c1cc6eccfd..b5edcbe8ea9 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -1738,7 +1738,7 @@ Default value: 0. ## optimize_functions_to_subcolumns {#optimize-functions-to-subcolumns} -Enables or disables optimization by transforming some functions to reading subcolumns. This reduces the amount of data to read. +Enables or disables optimization by transforming some functions to reading subcolumns. This reduces the amount of data to read. These functions can be transformed: @@ -1969,6 +1969,13 @@ Possible values: 32 (32 bytes) - 1073741824 (1 GiB) Default value: 32768 (32 KiB) +## output_format_avro_string_column_pattern {#output_format_avro_string_column_pattern} + +Regexp of column names of type String to output as Avro `string` (default is `bytes`). +RE2 syntax is supported. + +Type: string + ## format_avro_schema_registry_url {#format_avro_schema_registry_url} Sets [Confluent Schema Registry](https://docs.confluent.io/current/schema-registry/index.html) URL to use with [AvroConfluent](../../interfaces/formats.md#data-format-avro-confluent) format. diff --git a/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp b/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp index e20ad539979..96e9ed0ab34 100644 --- a/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp @@ -57,7 +57,7 @@ class AvroSerializerTraits public: bool isStringAsString(const String & column_name) { - return RE2::PartialMatch/*FullMatch*/(column_name, string_to_string_regexp); + return RE2::FullMatch(column_name, string_to_string_regexp); } AvroSerializerTraits(const FormatSettings & settings_) @@ -192,17 +192,17 @@ AvroSerializer::SchemaWithSerializeFn AvroSerializer::createSchemaWithSerializeF case TypeIndex::String: if (traits->isStringAsString(column_name)) return {avro::StringSchema(), [](const IColumn & column, size_t row_num, avro::Encoder & encoder) - { - const StringRef & s = assert_cast(column).getDataAt(row_num); - encoder.encodeString(s.toString()); - } + { + const StringRef & s = assert_cast(column).getDataAt(row_num); + encoder.encodeString(s.toString()); + } }; else return {avro::BytesSchema(), [](const IColumn & column, size_t row_num, avro::Encoder & encoder) - { - const StringRef & s = assert_cast(column).getDataAt(row_num); - encoder.encodeBytes(reinterpret_cast(s.data), s.size); - } + { + const StringRef & s = assert_cast(column).getDataAt(row_num); + encoder.encodeBytes(reinterpret_cast(s.data), s.size); + } }; case TypeIndex::FixedString: { diff --git a/tests/queries/0_stateless/01060_avro.reference b/tests/queries/0_stateless/01060_avro.reference index 338ffe0cf96..7a5aa43a36a 100644 --- a/tests/queries/0_stateless/01060_avro.reference +++ b/tests/queries/0_stateless/01060_avro.reference @@ -58,3 +58,9 @@ not found 0 1000 147 += string column pattern +"русская строка" +Ok +1 0 +1 1 +1 1 diff --git a/tests/queries/0_stateless/01060_avro.sh b/tests/queries/0_stateless/01060_avro.sh index 4779cd64953..78c0c755663 100755 --- a/tests/queries/0_stateless/01060_avro.sh +++ b/tests/queries/0_stateless/01060_avro.sh @@ -89,3 +89,22 @@ ${CLICKHOUSE_LOCAL} -q "select toInt64(number) as a from numbers(1000) format A # type supported via conversion ${CLICKHOUSE_LOCAL} -q "select toInt16(123) as a format Avro" | wc -c | tr -d ' ' + +echo '=' string column pattern +${CLICKHOUSE_LOCAL} -q "select 'русская строка' as a format Avro SETTINGS output_format_avro_string_column_pattern = 'a'" | ${CLICKHOUSE_LOCAL} --input-format Avro --output-format CSV -S "a String" -q 'select * from table' + +# it is expected that invalid UTF-8 can be created +${CLICKHOUSE_LOCAL} -q "select '\x61\xF0\x80\x80\x80b' as a format Avro" > /dev/null && echo Ok + +A_NEEDLE="'\"name\":\"a\",\"type\":\"string\"'" +B_NEEDLE="'\"name\":\"b\",\"type\":\"string\"'" +PATTERNQUERY="select 'русская строка' as a, 'русская строка' as b format Avro SETTINGS output_format_avro_string_column_pattern =" + +PATTERNPATTERN="'a'" +${CLICKHOUSE_LOCAL} -q "$PATTERNQUERY $PATTERNPATTERN" | ${CLICKHOUSE_LOCAL} --structure "avro_raw String" --input-format LineAsString -q "select countSubstrings(avro_raw, $A_NEEDLE), countSubstrings(avro_raw, $B_NEEDLE) from table" + +PATTERNPATTERN="'a|b'" +${CLICKHOUSE_LOCAL} -q "$PATTERNQUERY $PATTERNPATTERN" | ${CLICKHOUSE_LOCAL} --structure "avro_raw String" --input-format LineAsString -q "select countSubstrings(avro_raw, $A_NEEDLE), countSubstrings(avro_raw, $B_NEEDLE) from table" + +PATTERNPATTERN="'.*'" +${CLICKHOUSE_LOCAL} -q "$PATTERNQUERY $PATTERNPATTERN" | ${CLICKHOUSE_LOCAL} --structure "avro_raw String" --input-format LineAsString -q "select countSubstrings(avro_raw, $A_NEEDLE), countSubstrings(avro_raw, $B_NEEDLE) from table" From 969458f0430e4035f1983f69275b5631a063bba1 Mon Sep 17 00:00:00 2001 From: Ilya Golshtein Date: Mon, 12 Jul 2021 21:44:55 +0300 Subject: [PATCH 04/29] Avro string for ClickHouse string minor fixes --- src/Processors/Formats/Impl/AvroRowOutputFormat.cpp | 12 ++++++------ tests/queries/0_stateless/01060_avro.sh | 6 +++--- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp b/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp index 96e9ed0ab34..4345e51b8c7 100644 --- a/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp @@ -55,12 +55,7 @@ namespace ErrorCodes class AvroSerializerTraits { public: - bool isStringAsString(const String & column_name) - { - return RE2::FullMatch(column_name, string_to_string_regexp); - } - - AvroSerializerTraits(const FormatSettings & settings_) + explicit AvroSerializerTraits(const FormatSettings & settings_) : string_to_string_regexp(settings_.avro.string_column_pattern) { if (!string_to_string_regexp.ok()) @@ -70,6 +65,11 @@ public: DB::ErrorCodes::CANNOT_COMPILE_REGEXP); } + bool isStringAsString(const String & column_name) + { + return RE2::FullMatch(column_name, string_to_string_regexp); + } + ~AvroSerializerTraits() { } diff --git a/tests/queries/0_stateless/01060_avro.sh b/tests/queries/0_stateless/01060_avro.sh index 78c0c755663..8c37014a593 100755 --- a/tests/queries/0_stateless/01060_avro.sh +++ b/tests/queries/0_stateless/01060_avro.sh @@ -101,10 +101,10 @@ B_NEEDLE="'\"name\":\"b\",\"type\":\"string\"'" PATTERNQUERY="select 'русская строка' as a, 'русская строка' as b format Avro SETTINGS output_format_avro_string_column_pattern =" PATTERNPATTERN="'a'" -${CLICKHOUSE_LOCAL} -q "$PATTERNQUERY $PATTERNPATTERN" | ${CLICKHOUSE_LOCAL} --structure "avro_raw String" --input-format LineAsString -q "select countSubstrings(avro_raw, $A_NEEDLE), countSubstrings(avro_raw, $B_NEEDLE) from table" +${CLICKHOUSE_LOCAL} -q "$PATTERNQUERY $PATTERNPATTERN" | tr -d '\n' | ${CLICKHOUSE_LOCAL} --structure "avro_raw String" --input-format LineAsString -q "select countSubstrings(avro_raw, $A_NEEDLE), countSubstrings(avro_raw, $B_NEEDLE) from table" PATTERNPATTERN="'a|b'" -${CLICKHOUSE_LOCAL} -q "$PATTERNQUERY $PATTERNPATTERN" | ${CLICKHOUSE_LOCAL} --structure "avro_raw String" --input-format LineAsString -q "select countSubstrings(avro_raw, $A_NEEDLE), countSubstrings(avro_raw, $B_NEEDLE) from table" +${CLICKHOUSE_LOCAL} -q "$PATTERNQUERY $PATTERNPATTERN" | tr -d '\n' | ${CLICKHOUSE_LOCAL} --structure "avro_raw String" --input-format LineAsString -q "select countSubstrings(avro_raw, $A_NEEDLE), countSubstrings(avro_raw, $B_NEEDLE) from table" PATTERNPATTERN="'.*'" -${CLICKHOUSE_LOCAL} -q "$PATTERNQUERY $PATTERNPATTERN" | ${CLICKHOUSE_LOCAL} --structure "avro_raw String" --input-format LineAsString -q "select countSubstrings(avro_raw, $A_NEEDLE), countSubstrings(avro_raw, $B_NEEDLE) from table" +${CLICKHOUSE_LOCAL} -q "$PATTERNQUERY $PATTERNPATTERN" | tr -d '\n' | ${CLICKHOUSE_LOCAL} --structure "avro_raw String" --input-format LineAsString -q "select countSubstrings(avro_raw, $A_NEEDLE), countSubstrings(avro_raw, $B_NEEDLE) from table" From e1cc7387be12dae976289e4bfe22c40365d5abe6 Mon Sep 17 00:00:00 2001 From: Ilya Golshtein Date: Tue, 13 Jul 2021 11:38:04 +0300 Subject: [PATCH 05/29] Avro string for ClickHouse string make clang tidy happy --- src/Processors/Formats/Impl/AvroRowOutputFormat.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp b/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp index 4345e51b8c7..c4ec8736a81 100644 --- a/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp @@ -70,9 +70,7 @@ public: return RE2::FullMatch(column_name, string_to_string_regexp); } - ~AvroSerializerTraits() - { - } + ~AvroSerializerTraits() = default; private: const RE2 string_to_string_regexp; From 21f2b006cd57ebe8473c0999ac2c501c8cb8960c Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 15 Jul 2021 08:17:22 +0300 Subject: [PATCH 06/29] Update test_cte_distributed.py --- .../test_backward_compatibility/test_cte_distributed.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_backward_compatibility/test_cte_distributed.py b/tests/integration/test_backward_compatibility/test_cte_distributed.py index 64090b892b8..3aec527524b 100644 --- a/tests/integration/test_backward_compatibility/test_cte_distributed.py +++ b/tests/integration/test_backward_compatibility/test_cte_distributed.py @@ -5,7 +5,7 @@ from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__, name="cte_distributed") node1 = cluster.add_instance('node1', with_zookeeper=False) node2 = cluster.add_instance('node2', - with_zookeeper=False, image='yandex/clickhouse-server', tag='19.16.9.37', stay_alive=True, + with_zookeeper=False, image='yandex/clickhouse-server', tag='21.7.3.14', stay_alive=True, with_installed_binary=True) From c2bb0a02a8ff1b375bab16c2e89969fb34f4ae48 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Thu, 15 Jul 2021 11:59:18 +0300 Subject: [PATCH 07/29] Add test. --- .../test_create_user_and_login/test.py | 46 +++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/tests/integration/test_create_user_and_login/test.py b/tests/integration/test_create_user_and_login/test.py index 58a48bde95d..d0edde2233b 100644 --- a/tests/integration/test_create_user_and_login/test.py +++ b/tests/integration/test_create_user_and_login/test.py @@ -1,5 +1,8 @@ import pytest +import time +import logging from helpers.cluster import ClickHouseCluster +from helpers.test_tools import assert_eq_with_retry cluster = ClickHouseCluster(__file__) instance = cluster.add_instance('instance') @@ -38,3 +41,46 @@ def test_grant_create_user(): instance.query("GRANT CREATE USER ON *.* TO A") instance.query("CREATE USER B", user='A') assert instance.query("SELECT 1", user='B') == "1\n" + + +def test_login_as_dropped_user(): + for _ in range(0, 2): + instance.query("CREATE USER A") + assert instance.query("SELECT 1", user='A') == "1\n" + + instance.query("DROP USER A") + expected_error = "no user with such name" + assert expected_error in instance.query_and_get_error("SELECT 1", user='A') + + +def test_login_as_dropped_user_xml(): + for _ in range(0, 2): + instance.exec_in_container(["bash", "-c" , """ + cat > /etc/clickhouse-server/users.d/user_c.xml << EOF + + + + + + + + +EOF"""]) + + assert_eq_with_retry(instance, "SELECT name FROM system.users WHERE name='C'", "C") + + instance.exec_in_container(["bash", "-c" , "rm /etc/clickhouse-server/users.d/user_c.xml"]) + + expected_error = "no user with such name" + while True: + out, err = instance.query_and_get_answer_with_error("SELECT 1", user='C') + if expected_error in err: + logging.debug(f"Got error '{expected_error}' just as expected") + break + if out == "1\n": + logging.debug(f"Got output '1', retrying...") + time.sleep(0.5) + continue + raise Exception(f"Expected either output '1' or error '{expected_error}', got output={out} and error={err}") + + assert instance.query("SELECT name FROM system.users WHERE name='C'") == "" From 398eb9f34a456c263c8406ce147a2d71c0cced22 Mon Sep 17 00:00:00 2001 From: vdimir Date: Thu, 15 Jul 2021 18:01:20 +0300 Subject: [PATCH 08/29] Cleanup JoiningTransform::readExecute --- src/Processors/Transforms/JoiningTransform.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/Processors/Transforms/JoiningTransform.cpp b/src/Processors/Transforms/JoiningTransform.cpp index 31b2da46ab3..ef0e81013d1 100644 --- a/src/Processors/Transforms/JoiningTransform.cpp +++ b/src/Processors/Transforms/JoiningTransform.cpp @@ -183,11 +183,9 @@ void JoiningTransform::transform(Chunk & chunk) Block JoiningTransform::readExecute(Chunk & chunk) { Block res; - // std::cerr << "=== Chunk rows " << chunk.getNumRows() << " cols " << chunk.getNumColumns() << std::endl; if (!not_processed) { - // std::cerr << "!not_processed " << std::endl; if (chunk.hasColumns()) res = inputs.front().getHeader().cloneWithColumns(chunk.detachColumns()); @@ -196,7 +194,6 @@ Block JoiningTransform::readExecute(Chunk & chunk) } else if (not_processed->empty()) /// There's not processed data inside expression. { - // std::cerr << "not_processed->empty() " << std::endl; if (chunk.hasColumns()) res = inputs.front().getHeader().cloneWithColumns(chunk.detachColumns()); @@ -205,12 +202,10 @@ Block JoiningTransform::readExecute(Chunk & chunk) } else { - // std::cerr << "not not_processed->empty() " << std::endl; res = std::move(not_processed->block); join->joinBlock(res, not_processed); } - // std::cerr << "Res block rows " << res.rows() << " cols " << res.columns() << std::endl; return res; } From e3b3fdac0a3ff64bea58ca07cdbe0853ee7b340f Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 15 Jul 2021 18:02:27 +0300 Subject: [PATCH 09/29] small bugfix in Block --- src/Core/Block.cpp | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/Core/Block.cpp b/src/Core/Block.cpp index fa78f052f37..c5e968a1047 100644 --- a/src/Core/Block.cpp +++ b/src/Core/Block.cpp @@ -375,9 +375,13 @@ void Block::setColumn(size_t position, ColumnWithTypeAndName && column) throw Exception(ErrorCodes::POSITION_OUT_OF_BOUND, "Position {} out of bound in Block::setColumn(), max position {}", position, toString(data.size())); - data[position].name = std::move(column.name); - data[position].type = std::move(column.type); - data[position].column = std::move(column.column); + if (data[position].name != column.name) + { + index_by_name.erase(data[position].name); + index_by_name.emplace(column.name, position); + } + + data[position] = std::move(column); } From cf77a3e05a68d04740749567394491639930fe1d Mon Sep 17 00:00:00 2001 From: vdimir Date: Thu, 15 Jul 2021 18:22:46 +0300 Subject: [PATCH 10/29] Fix infine non joined block stream in merge join --- src/Interpreters/MergeJoin.cpp | 3 +++ .../01943_pmj_non_joined_stuck.reference | 16 ++++++++++++++++ .../01943_pmj_non_joined_stuck.sql | 19 +++++++++++++++++++ 3 files changed, 38 insertions(+) create mode 100644 tests/queries/0_stateless/01943_pmj_non_joined_stuck.reference create mode 100644 tests/queries/0_stateless/01943_pmj_non_joined_stuck.sql diff --git a/src/Interpreters/MergeJoin.cpp b/src/Interpreters/MergeJoin.cpp index 26463c8c6ed..1904ffa332b 100644 --- a/src/Interpreters/MergeJoin.cpp +++ b/src/Interpreters/MergeJoin.cpp @@ -1053,7 +1053,10 @@ private: } if (rows_added >= max_block_size) + { + ++block_number; break; + } } return rows_added; diff --git a/tests/queries/0_stateless/01943_pmj_non_joined_stuck.reference b/tests/queries/0_stateless/01943_pmj_non_joined_stuck.reference new file mode 100644 index 00000000000..58501cbd0fc --- /dev/null +++ b/tests/queries/0_stateless/01943_pmj_non_joined_stuck.reference @@ -0,0 +1,16 @@ +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 diff --git a/tests/queries/0_stateless/01943_pmj_non_joined_stuck.sql b/tests/queries/0_stateless/01943_pmj_non_joined_stuck.sql new file mode 100644 index 00000000000..ad7331ee2db --- /dev/null +++ b/tests/queries/0_stateless/01943_pmj_non_joined_stuck.sql @@ -0,0 +1,19 @@ +SET max_block_size = 6, join_algorithm = 'partial_merge'; + +SELECT count() == 4 FROM (SELECT 1 AS s) AS js1 ALL RIGHT JOIN (SELECT arrayJoin([2, 2, 2, 2]) AS s) AS js2 USING (s); +SELECT count() == 5 FROM (SELECT 1 AS s) AS js1 ALL RIGHT JOIN (SELECT arrayJoin([2, 2, 2, 2, 2]) AS s) AS js2 USING (s); +SELECT count() == 6 FROM (SELECT 1 AS s) AS js1 ALL RIGHT JOIN (SELECT arrayJoin([2, 2, 2, 2, 2, 2]) AS s) AS js2 USING (s); +SELECT count() == 7 FROM (SELECT 1 AS s) AS js1 ALL RIGHT JOIN (SELECT arrayJoin([2, 2, 2, 2, 2, 2, 2]) AS s) AS js2 USING (s); +SELECT count() == 8 FROM (SELECT 1 AS s) AS js1 ALL RIGHT JOIN (SELECT arrayJoin([2, 2, 2, 2, 2, 2, 2, 3]) AS s) AS js2 USING (s); +SELECT count() == 9 FROM (SELECT 1 AS s) AS js1 ALL RIGHT JOIN (SELECT arrayJoin([2, 2, 2, 2, 2, 2, 2, 3, 3]) AS s) AS js2 USING (s); +SELECT count() == 10 FROM (SELECT 1 AS s) AS js1 ALL RIGHT JOIN (SELECT arrayJoin([2, 2, 2, 2, 2, 2, 2, 3, 3, 3]) AS s) AS js2 USING (s); +SELECT count() == 11 FROM (SELECT 1 AS s) AS js1 ALL RIGHT JOIN (SELECT arrayJoin([2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3]) AS s) AS js2 USING (s); +SELECT count() == 12 FROM (SELECT 1 AS s) AS js1 ALL RIGHT JOIN (SELECT arrayJoin([2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3]) AS s) AS js2 USING (s); +SELECT count() == 13 FROM (SELECT 1 AS s) AS js1 ALL RIGHT JOIN (SELECT arrayJoin([2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3]) AS s) AS js2 USING (s); +SELECT count() == 14 FROM (SELECT 1 AS s) AS js1 ALL RIGHT JOIN (SELECT arrayJoin([2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3]) AS s) AS js2 USING (s); +SELECT count() == 15 FROM (SELECT 1 AS s) AS js1 ALL RIGHT JOIN (SELECT arrayJoin([2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3]) AS s) AS js2 USING (s); + +SELECT count() == 8 FROM (SELECT 1 AS s) AS js1 FULL JOIN (SELECT arrayJoin([2, 2, 2, 2, 2, 2, 2]) AS s) AS js2 USING (s); +SELECT count() == 9 FROM (SELECT 1 AS s) AS js1 FULL JOIN (SELECT arrayJoin([2, 2, 2, 2, 2, 2, 2, 3]) AS s) AS js2 USING (s); +SELECT count() == 10 FROM (SELECT 1 AS s) AS js1 FULL JOIN (SELECT arrayJoin([2, 2, 2, 2, 2, 2, 2, 3, 3]) AS s) AS js2 USING (s); +SELECT count() == 11 FROM (SELECT 1 AS s) AS js1 FULL JOIN (SELECT arrayJoin([2, 2, 2, 2, 2, 2, 2, 3, 3, 3]) AS s) AS js2 USING (s); From 70ec444400e66d840bbbf38835517e6d9b90e703 Mon Sep 17 00:00:00 2001 From: zxc111 Date: Thu, 15 Jul 2021 23:58:44 +0800 Subject: [PATCH 11/29] update operations/settings/merge-tree-settings.md --- docs/en/operations/settings/merge-tree-settings.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/docs/en/operations/settings/merge-tree-settings.md b/docs/en/operations/settings/merge-tree-settings.md index 791ac344bcf..a4fd0869629 100644 --- a/docs/en/operations/settings/merge-tree-settings.md +++ b/docs/en/operations/settings/merge-tree-settings.md @@ -278,4 +278,16 @@ Possible values: Default value: `0`. +## check_sample_column_is_correct {#check_sample_column_is_correct} + +Enables to check column or column by hash for sampling is correct at creation table. + +Possible values: + +- true — Check column for sampling is correct at creation table +- false — Check column for sampling is correct at creation table will be ignored. + +Default value: `true`. + +By default, the ClickHouse server check sampling condition at creation table. If you already had tables with incorrect sampling condition, set value `false` to make ClickHouse server do not raise exception when ClickHouse server is starting. [Original article](https://clickhouse.tech/docs/en/operations/settings/merge_tree_settings/) From 6c74b4472969c0d43448400ec80ea39cf7322a89 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Thu, 15 Jul 2021 11:55:51 +0300 Subject: [PATCH 12/29] Fix possible crash when login as dropped user. --- src/Access/AccessControlManager.cpp | 7 ++- src/Access/ContextAccess.cpp | 54 +++++++++++++------ src/Access/ContextAccess.h | 5 -- src/Access/EnabledRowPolicies.cpp | 7 ++- src/Access/EnabledRowPolicies.h | 1 + src/Access/SettingsConstraints.cpp | 13 +++-- src/Access/SettingsConstraints.h | 1 + src/Interpreters/Context.cpp | 12 ++--- src/Storages/System/StorageSystemSettings.cpp | 3 +- 9 files changed, 66 insertions(+), 37 deletions(-) diff --git a/src/Access/AccessControlManager.cpp b/src/Access/AccessControlManager.cpp index 66023c1c0ea..7ae5eeb8288 100644 --- a/src/Access/AccessControlManager.cpp +++ b/src/Access/AccessControlManager.cpp @@ -64,7 +64,12 @@ public: std::lock_guard lock{mutex}; auto x = cache.get(params); if (x) - return *x; + { + if ((*x)->getUser()) + return *x; + /// No user, probably the user has been dropped while it was in the cache. + cache.remove(params); + } auto res = std::shared_ptr(new ContextAccess(manager, params)); cache.add(params, res); return res; diff --git a/src/Access/ContextAccess.cpp b/src/Access/ContextAccess.cpp index 90495a83dfc..697e1ce39f5 100644 --- a/src/Access/ContextAccess.cpp +++ b/src/Access/ContextAccess.cpp @@ -163,11 +163,10 @@ void ContextAccess::setUser(const UserPtr & user_) const if (!user) { /// User has been dropped. - auto nothing_granted = std::make_shared(); - access = nothing_granted; - access_with_implicit = nothing_granted; subscription_for_user_change = {}; subscription_for_roles_changes = {}; + access = nullptr; + access_with_implicit = nullptr; enabled_roles = nullptr; roles_info = nullptr; enabled_row_policies = nullptr; @@ -252,32 +251,45 @@ String ContextAccess::getUserName() const std::shared_ptr ContextAccess::getRolesInfo() const { std::lock_guard lock{mutex}; - return roles_info; + if (roles_info) + return roles_info; + static const auto no_roles = std::make_shared(); + return no_roles; } std::shared_ptr ContextAccess::getEnabledRowPolicies() const { std::lock_guard lock{mutex}; - return enabled_row_policies; + if (enabled_row_policies) + return enabled_row_policies; + static const auto no_row_policies = std::make_shared(); + return no_row_policies; } ASTPtr ContextAccess::getRowPolicyCondition(const String & database, const String & table_name, RowPolicy::ConditionType index, const ASTPtr & extra_condition) const { std::lock_guard lock{mutex}; - return enabled_row_policies ? enabled_row_policies->getCondition(database, table_name, index, extra_condition) : nullptr; + if (enabled_row_policies) + return enabled_row_policies->getCondition(database, table_name, index, extra_condition); + return nullptr; } std::shared_ptr ContextAccess::getQuota() const { std::lock_guard lock{mutex}; - return enabled_quota; + if (enabled_quota) + return enabled_quota; + static const auto unlimited_quota = EnabledQuota::getUnlimitedQuota(); + return unlimited_quota; } std::optional ContextAccess::getQuotaUsage() const { std::lock_guard lock{mutex}; - return enabled_quota ? enabled_quota->getUsage() : std::optional{}; + if (enabled_quota) + return enabled_quota->getUsage(); + return {}; } @@ -288,7 +300,7 @@ std::shared_ptr ContextAccess::getFullAccess() auto full_access = std::shared_ptr(new ContextAccess); full_access->is_full_access = true; full_access->access = std::make_shared(AccessRights::getFullAccess()); - full_access->enabled_quota = EnabledQuota::getUnlimitedQuota(); + full_access->access_with_implicit = std::make_shared(addImplicitAccessRights(*full_access->access)); return full_access; }(); return res; @@ -298,28 +310,40 @@ std::shared_ptr ContextAccess::getFullAccess() std::shared_ptr ContextAccess::getDefaultSettings() const { std::lock_guard lock{mutex}; - return enabled_settings ? enabled_settings->getSettings() : nullptr; + if (enabled_settings) + return enabled_settings->getSettings(); + static const auto everything_by_default = std::make_shared(); + return everything_by_default; } std::shared_ptr ContextAccess::getSettingsConstraints() const { std::lock_guard lock{mutex}; - return enabled_settings ? enabled_settings->getConstraints() : nullptr; + if (enabled_settings) + return enabled_settings->getConstraints(); + static const auto no_constraints = std::make_shared(); + return no_constraints; } std::shared_ptr ContextAccess::getAccessRights() const { std::lock_guard lock{mutex}; - return access; + if (access) + return access; + static const auto nothing_granted = std::make_shared(); + return nothing_granted; } std::shared_ptr ContextAccess::getAccessRightsWithImplicit() const { std::lock_guard lock{mutex}; - return access_with_implicit; + if (access_with_implicit) + return access_with_implicit; + static const auto nothing_granted = std::make_shared(); + return nothing_granted; } @@ -551,7 +575,7 @@ bool ContextAccess::checkAdminOptionImplHelper(const Container & role_ids, const for (auto it = std::begin(role_ids); it != std::end(role_ids); ++it, ++i) { const UUID & role_id = *it; - if (info && info->enabled_roles_with_admin_option.count(role_id)) + if (info->enabled_roles_with_admin_option.count(role_id)) continue; if (throw_if_denied) @@ -560,7 +584,7 @@ bool ContextAccess::checkAdminOptionImplHelper(const Container & role_ids, const if (!role_name) role_name = "ID {" + toString(role_id) + "}"; - if (info && info->enabled_roles.count(role_id)) + if (info->enabled_roles.count(role_id)) show_error("Not enough privileges. " "Role " + backQuote(*role_name) + " is granted, but without ADMIN option. " "To execute this query it's necessary to have the role " + backQuoteIfNeed(*role_name) + " granted with ADMIN option.", diff --git a/src/Access/ContextAccess.h b/src/Access/ContextAccess.h index a4373be4ff0..c7c4726c535 100644 --- a/src/Access/ContextAccess.h +++ b/src/Access/ContextAccess.h @@ -71,11 +71,9 @@ public: String getUserName() const; /// Returns information about current and enabled roles. - /// The function can return nullptr. std::shared_ptr getRolesInfo() const; /// Returns information about enabled row policies. - /// The function can return nullptr. std::shared_ptr getEnabledRowPolicies() const; /// Returns the row policy filter for a specified table. @@ -83,16 +81,13 @@ public: ASTPtr getRowPolicyCondition(const String & database, const String & table_name, RowPolicy::ConditionType index, const ASTPtr & extra_condition = nullptr) const; /// Returns the quota to track resource consumption. - /// The function returns nullptr if no tracking or limitation is needed. std::shared_ptr getQuota() const; std::optional getQuotaUsage() const; /// Returns the default settings, i.e. the settings to apply on user's login. - /// The function returns nullptr if it's no need to apply settings. std::shared_ptr getDefaultSettings() const; /// Returns the settings' constraints. - /// The function returns nullptr if there are no constraints. std::shared_ptr getSettingsConstraints() const; /// Returns the current access rights. diff --git a/src/Access/EnabledRowPolicies.cpp b/src/Access/EnabledRowPolicies.cpp index efd5ed4ae10..674dab3e0f0 100644 --- a/src/Access/EnabledRowPolicies.cpp +++ b/src/Access/EnabledRowPolicies.cpp @@ -12,8 +12,11 @@ size_t EnabledRowPolicies::Hash::operator()(const MixedConditionKey & key) const } -EnabledRowPolicies::EnabledRowPolicies(const Params & params_) - : params(params_) +EnabledRowPolicies::EnabledRowPolicies() : params() +{ +} + +EnabledRowPolicies::EnabledRowPolicies(const Params & params_) : params(params_) { } diff --git a/src/Access/EnabledRowPolicies.h b/src/Access/EnabledRowPolicies.h index 0ca4f16fcf1..5e819733963 100644 --- a/src/Access/EnabledRowPolicies.h +++ b/src/Access/EnabledRowPolicies.h @@ -32,6 +32,7 @@ public: friend bool operator >=(const Params & lhs, const Params & rhs) { return !(lhs < rhs); } }; + EnabledRowPolicies(); ~EnabledRowPolicies(); using ConditionType = RowPolicy::ConditionType; diff --git a/src/Access/SettingsConstraints.cpp b/src/Access/SettingsConstraints.cpp index 316f869fc79..988900e57d2 100644 --- a/src/Access/SettingsConstraints.cpp +++ b/src/Access/SettingsConstraints.cpp @@ -18,6 +18,8 @@ namespace ErrorCodes } +SettingsConstraints::SettingsConstraints() = default; + SettingsConstraints::SettingsConstraints(const AccessControlManager & manager_) : manager(&manager_) { } @@ -199,10 +201,13 @@ bool SettingsConstraints::checkImpl(const Settings & current_settings, SettingCh } }; - if (reaction == THROW_ON_VIOLATION) - manager->checkSettingNameIsAllowed(setting_name); - else if (!manager->isSettingNameAllowed(setting_name)) - return false; + if (manager) + { + if (reaction == THROW_ON_VIOLATION) + manager->checkSettingNameIsAllowed(setting_name); + else if (!manager->isSettingNameAllowed(setting_name)) + return false; + } Field current_value, new_value; if (current_settings.tryGet(setting_name, current_value)) diff --git a/src/Access/SettingsConstraints.h b/src/Access/SettingsConstraints.h index 4259fe15e25..cdec2bb293c 100644 --- a/src/Access/SettingsConstraints.h +++ b/src/Access/SettingsConstraints.h @@ -51,6 +51,7 @@ class AccessControlManager; class SettingsConstraints { public: + SettingsConstraints(); SettingsConstraints(const AccessControlManager & manager_); SettingsConstraints(const SettingsConstraints & src); SettingsConstraints & operator =(const SettingsConstraints & src); diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index cbf2c0820f5..ccb356a4ca7 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -1198,26 +1198,22 @@ void Context::applySettingsChanges(const SettingsChanges & changes) void Context::checkSettingsConstraints(const SettingChange & change) const { - if (auto settings_constraints = getSettingsConstraints()) - settings_constraints->check(settings, change); + getSettingsConstraints()->check(settings, change); } void Context::checkSettingsConstraints(const SettingsChanges & changes) const { - if (auto settings_constraints = getSettingsConstraints()) - settings_constraints->check(settings, changes); + getSettingsConstraints()->check(settings, changes); } void Context::checkSettingsConstraints(SettingsChanges & changes) const { - if (auto settings_constraints = getSettingsConstraints()) - settings_constraints->check(settings, changes); + getSettingsConstraints()->check(settings, changes); } void Context::clampToSettingsConstraints(SettingsChanges & changes) const { - if (auto settings_constraints = getSettingsConstraints()) - settings_constraints->clamp(settings, changes); + getSettingsConstraints()->clamp(settings, changes); } std::shared_ptr Context::getSettingsConstraints() const diff --git a/src/Storages/System/StorageSystemSettings.cpp b/src/Storages/System/StorageSystemSettings.cpp index 1aca7e45190..d90ca27cbc0 100644 --- a/src/Storages/System/StorageSystemSettings.cpp +++ b/src/Storages/System/StorageSystemSettings.cpp @@ -40,8 +40,7 @@ void StorageSystemSettings::fillData(MutableColumns & res_columns, ContextPtr co Field min, max; bool read_only = false; - if (settings_constraints) - settings_constraints->get(setting_name, min, max, read_only); + settings_constraints->get(setting_name, min, max, read_only); /// These two columns can accept strings only. if (!min.isNull()) From dd519f943427036d360ec994e3d7e0d66179d454 Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 15 Jul 2021 19:02:31 +0300 Subject: [PATCH 13/29] Fix --- src/Databases/SQLite/SQLiteUtils.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Databases/SQLite/SQLiteUtils.h b/src/Databases/SQLite/SQLiteUtils.h index d405e869b85..56ca1ca8cf3 100644 --- a/src/Databases/SQLite/SQLiteUtils.h +++ b/src/Databases/SQLite/SQLiteUtils.h @@ -7,7 +7,7 @@ #if USE_SQLITE #include #include -#include +#include // Y_IGNORE namespace DB From a7edcb0de3ca19e4b5e77962568341d6719a080c Mon Sep 17 00:00:00 2001 From: zxc111 Date: Fri, 16 Jul 2021 00:06:40 +0800 Subject: [PATCH 14/29] update engines/table-engines/mergetree-family/mergetree.md --- .../engines/table-engines/mergetree-family/mergetree.md | 2 +- docs/en/operations/settings/merge-tree-settings.md | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index 9d259456ea5..f0cdd75f90d 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -76,7 +76,7 @@ For a description of parameters, see the [CREATE query description](../../../sql - `SAMPLE BY` — An expression for sampling. Optional. - If a sampling expression is used, the primary key must contain it. Example: `SAMPLE BY intHash32(UserID) ORDER BY (CounterID, EventDate, intHash32(UserID))`. + If a sampling expression is used, the primary key must contain it. The result of sampling expression must be unsigned integer. Example: `SAMPLE BY intHash32(UserID) ORDER BY (CounterID, EventDate, intHash32(UserID))`. - `TTL` — A list of rules specifying storage duration of rows and defining logic of automatic parts movement [between disks and volumes](#table_engine-mergetree-multiple-volumes). Optional. diff --git a/docs/en/operations/settings/merge-tree-settings.md b/docs/en/operations/settings/merge-tree-settings.md index a4fd0869629..6c98187ebbf 100644 --- a/docs/en/operations/settings/merge-tree-settings.md +++ b/docs/en/operations/settings/merge-tree-settings.md @@ -280,14 +280,14 @@ Default value: `0`. ## check_sample_column_is_correct {#check_sample_column_is_correct} -Enables to check column or column by hash for sampling is correct at creation table. +Enables to check column for sampling or sampling expression is correct at creation table. Possible values: -- true — Check column for sampling is correct at creation table -- false — Check column for sampling is correct at creation table will be ignored. +- true — Check column or sampling expression is correct at creation table +- false — Check column or sampling expression is at creation table will be ignored. Default value: `true`. -By default, the ClickHouse server check sampling condition at creation table. If you already had tables with incorrect sampling condition, set value `false` to make ClickHouse server do not raise exception when ClickHouse server is starting. +By default, the ClickHouse server check column for sampling or sampling expression at creation table. If you already had tables with incorrect sampling expression, set value `false` to make ClickHouse server do not raise exception when ClickHouse server is starting. [Original article](https://clickhouse.tech/docs/en/operations/settings/merge_tree_settings/) From 02fd365485626464988490640fe02cb61f70f11f Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Fri, 16 Jul 2021 00:29:27 +0300 Subject: [PATCH 15/29] Fix calculating of intersection of access rights. --- src/Access/AccessRights.cpp | 18 ++-- src/Access/tests/gtest_access_rights_ops.cpp | 94 ++++++++++++++++++++ src/Interpreters/InterpreterGrantQuery.cpp | 64 ++++++++++--- 3 files changed, 154 insertions(+), 22 deletions(-) create mode 100644 src/Access/tests/gtest_access_rights_ops.cpp diff --git a/src/Access/AccessRights.cpp b/src/Access/AccessRights.cpp index f9c1d23350d..d4b2dc8a252 100644 --- a/src/Access/AccessRights.cpp +++ b/src/Access/AccessRights.cpp @@ -655,7 +655,7 @@ private: for (auto & [lhs_childname, lhs_child] : *children) { if (!rhs.tryGetChild(lhs_childname)) - lhs_child.flags |= rhs.flags & lhs_child.getAllGrantableFlags(); + lhs_child.addGrantsRec(rhs.flags); } } } @@ -673,7 +673,7 @@ private: for (auto & [lhs_childname, lhs_child] : *children) { if (!rhs.tryGetChild(lhs_childname)) - lhs_child.flags &= rhs.flags; + lhs_child.removeGrantsRec(~rhs.flags); } } } @@ -1041,17 +1041,15 @@ void AccessRights::makeIntersection(const AccessRights & other) auto helper = [](std::unique_ptr & root_node, const std::unique_ptr & other_root_node) { if (!root_node) + return; + if (!other_root_node) { - if (other_root_node) - root_node = std::make_unique(*other_root_node); + root_node = nullptr; return; } - if (other_root_node) - { - root_node->makeIntersection(*other_root_node); - if (!root_node->flags && !root_node->children) - root_node = nullptr; - } + root_node->makeIntersection(*other_root_node); + if (!root_node->flags && !root_node->children) + root_node = nullptr; }; helper(root, other.root); helper(root_with_grant_option, other.root_with_grant_option); diff --git a/src/Access/tests/gtest_access_rights_ops.cpp b/src/Access/tests/gtest_access_rights_ops.cpp new file mode 100644 index 00000000000..3d7b396a6f2 --- /dev/null +++ b/src/Access/tests/gtest_access_rights_ops.cpp @@ -0,0 +1,94 @@ +#include +#include + +using namespace DB; + + +TEST(AccessRights, Union) +{ + AccessRights lhs, rhs; + lhs.grant(AccessType::CREATE_TABLE, "db1", "tb1"); + rhs.grant(AccessType::SELECT, "db2"); + lhs.makeUnion(rhs); + ASSERT_EQ(lhs.toString(), "GRANT CREATE TABLE ON db1.tb1, GRANT SELECT ON db2.*"); + + lhs.clear(); + rhs.clear(); + rhs.grant(AccessType::SELECT, "db2"); + lhs.grant(AccessType::CREATE_TABLE, "db1", "tb1"); + lhs.makeUnion(rhs); + ASSERT_EQ(lhs.toString(), "GRANT CREATE TABLE ON db1.tb1, GRANT SELECT ON db2.*"); + + lhs = {}; + rhs = {}; + lhs.grant(AccessType::SELECT); + rhs.grant(AccessType::SELECT, "db1", "tb1"); + lhs.makeUnion(rhs); + ASSERT_EQ(lhs.toString(), "GRANT SELECT ON *.*"); + + lhs = {}; + rhs = {}; + lhs.grant(AccessType::SELECT, "db1", "tb1", Strings{"col1", "col2"}); + rhs.grant(AccessType::SELECT, "db1", "tb1", Strings{"col2", "col3"}); + lhs.makeUnion(rhs); + ASSERT_EQ(lhs.toString(), "GRANT SELECT(col1, col2, col3) ON db1.tb1"); + + lhs = {}; + rhs = {}; + lhs.grant(AccessType::SELECT, "db1", "tb1", Strings{"col1", "col2"}); + rhs.grantWithGrantOption(AccessType::SELECT, "db1", "tb1", Strings{"col2", "col3"}); + lhs.makeUnion(rhs); + ASSERT_EQ(lhs.toString(), "GRANT SELECT(col1) ON db1.tb1, GRANT SELECT(col2, col3) ON db1.tb1 WITH GRANT OPTION"); + + lhs = {}; + rhs = {}; + lhs.grant(AccessType::INSERT); + rhs.grant(AccessType::ALL, "db1"); + lhs.makeUnion(rhs); + ASSERT_EQ(lhs.toString(), "GRANT INSERT ON *.*, GRANT SHOW, SELECT, ALTER, CREATE DATABASE, CREATE TABLE, CREATE VIEW, CREATE DICTIONARY, DROP, TRUNCATE, OPTIMIZE, SYSTEM MERGES, SYSTEM TTL MERGES, SYSTEM FETCHES, SYSTEM MOVES, SYSTEM SENDS, SYSTEM REPLICATION QUEUES, SYSTEM DROP REPLICA, SYSTEM SYNC REPLICA, SYSTEM RESTART REPLICA, SYSTEM RESTORE REPLICA, SYSTEM FLUSH DISTRIBUTED, dictGet ON db1.*"); +} + + +TEST(AccessRights, Intersection) +{ + AccessRights lhs, rhs; + lhs.grant(AccessType::CREATE_TABLE, "db1", "tb1"); + rhs.grant(AccessType::SELECT, "db2"); + lhs.makeIntersection(rhs); + ASSERT_EQ(lhs.toString(), "GRANT USAGE ON *.*"); + + lhs.clear(); + rhs.clear(); + lhs.grant(AccessType::SELECT, "db2"); + rhs.grant(AccessType::CREATE_TABLE, "db1", "tb1"); + lhs.makeIntersection(rhs); + ASSERT_EQ(lhs.toString(), "GRANT USAGE ON *.*"); + + lhs = {}; + rhs = {}; + lhs.grant(AccessType::SELECT); + rhs.grant(AccessType::SELECT, "db1", "tb1"); + lhs.makeIntersection(rhs); + ASSERT_EQ(lhs.toString(), "GRANT SELECT ON db1.tb1"); + + lhs = {}; + rhs = {}; + lhs.grant(AccessType::SELECT, "db1", "tb1", Strings{"col1", "col2"}); + rhs.grant(AccessType::SELECT, "db1", "tb1", Strings{"col2", "col3"}); + lhs.makeIntersection(rhs); + ASSERT_EQ(lhs.toString(), "GRANT SELECT(col2) ON db1.tb1"); + + lhs = {}; + rhs = {}; + lhs.grant(AccessType::SELECT, "db1", "tb1", Strings{"col1", "col2"}); + rhs.grantWithGrantOption(AccessType::SELECT, "db1", "tb1", Strings{"col2", "col3"}); + lhs.makeIntersection(rhs); + ASSERT_EQ(lhs.toString(), "GRANT SELECT(col2) ON db1.tb1"); + + lhs = {}; + rhs = {}; + lhs.grant(AccessType::INSERT); + rhs.grant(AccessType::ALL, "db1"); + lhs.makeIntersection(rhs); + ASSERT_EQ(lhs.toString(), "GRANT INSERT ON db1.*"); +} diff --git a/src/Interpreters/InterpreterGrantQuery.cpp b/src/Interpreters/InterpreterGrantQuery.cpp index 7487ca79bde..d5fcb82d408 100644 --- a/src/Interpreters/InterpreterGrantQuery.cpp +++ b/src/Interpreters/InterpreterGrantQuery.cpp @@ -93,24 +93,28 @@ namespace const AccessControlManager & access_control, const ContextAccess & access, const ASTGrantQuery & query, - const std::vector & grantees_from_query) + const std::vector & grantees_from_query, + bool & need_check_grantees_are_allowed) { const auto & elements = query.access_rights_elements; + need_check_grantees_are_allowed = true; if (elements.empty()) + { + /// No access rights to grant or revoke. + need_check_grantees_are_allowed = false; return; + } - /// To execute the command GRANT the current user needs to have the access granted - /// with GRANT OPTION. if (!query.is_revoke) { + /// To execute the command GRANT the current user needs to have the access granted with GRANT OPTION. access.checkGrantOption(elements); - checkGranteesAreAllowed(access_control, access, grantees_from_query); return; } if (access.hasGrantOption(elements)) { - checkGranteesAreAllowed(access_control, access, grantees_from_query); + /// Simple case: the current user has the grant option for all the access rights specified for REVOKE. return; } @@ -137,6 +141,7 @@ namespace all_granted_access.makeUnion(user->access); } } + need_check_grantees_are_allowed = false; /// already checked AccessRights required_access; if (elements[0].is_partial_revoke) @@ -158,21 +163,28 @@ namespace } } - std::vector getRoleIDsAndCheckAdminOption( const AccessControlManager & access_control, const ContextAccess & access, const ASTGrantQuery & query, const RolesOrUsersSet & roles_from_query, - const std::vector & grantees_from_query) + const std::vector & grantees_from_query, + bool & need_check_grantees_are_allowed) { - std::vector matching_ids; + need_check_grantees_are_allowed = true; + if (roles_from_query.empty()) + { + /// No roles to grant or revoke. + need_check_grantees_are_allowed = false; + return {}; + } + std::vector matching_ids; if (!query.is_revoke) { + /// To execute the command GRANT the current user needs to have the roles granted with ADMIN OPTION. matching_ids = roles_from_query.getMatchingIDs(access_control); access.checkAdminOption(matching_ids); - checkGranteesAreAllowed(access_control, access, grantees_from_query); return matching_ids; } @@ -181,7 +193,7 @@ namespace matching_ids = roles_from_query.getMatchingIDs(); if (access.hasAdminOption(matching_ids)) { - checkGranteesAreAllowed(access_control, access, grantees_from_query); + /// Simple case: the current user has the admin option for all the roles specified for REVOKE. return matching_ids; } } @@ -209,6 +221,7 @@ namespace all_granted_roles.makeUnion(user->granted_roles); } } + need_check_grantees_are_allowed = false; /// already checked const auto & all_granted_roles_set = query.admin_option ? all_granted_roles.getGrantedWithAdminOption() : all_granted_roles.getGranted(); if (roles_from_query.all) @@ -218,6 +231,33 @@ namespace access.checkAdminOption(matching_ids); return matching_ids; } + + void checkGrantOptionAndGrantees( + const AccessControlManager & access_control, + const ContextAccess & access, + const ASTGrantQuery & query, + const std::vector & grantees_from_query) + { + bool need_check_grantees_are_allowed = true; + checkGrantOption(access_control, access, query, grantees_from_query, need_check_grantees_are_allowed); + if (need_check_grantees_are_allowed) + checkGranteesAreAllowed(access_control, access, grantees_from_query); + } + + std::vector getRoleIDsAndCheckAdminOptionAndGrantees( + const AccessControlManager & access_control, + const ContextAccess & access, + const ASTGrantQuery & query, + const RolesOrUsersSet & roles_from_query, + const std::vector & grantees_from_query) + { + bool need_check_grantees_are_allowed = true; + auto role_ids = getRoleIDsAndCheckAdminOption( + access_control, access, query, roles_from_query, grantees_from_query, need_check_grantees_are_allowed); + if (need_check_grantees_are_allowed) + checkGranteesAreAllowed(access_control, access, grantees_from_query); + return role_ids; + } } @@ -243,7 +283,7 @@ BlockIO InterpreterGrantQuery::execute() /// Check if the current user has corresponding roles granted with admin option. std::vector roles; if (roles_set) - roles = getRoleIDsAndCheckAdminOption(access_control, *getContext()->getAccess(), query, *roles_set, grantees); + roles = getRoleIDsAndCheckAdminOptionAndGrantees(access_control, *getContext()->getAccess(), query, *roles_set, grantees); if (!query.cluster.empty()) { @@ -258,7 +298,7 @@ BlockIO InterpreterGrantQuery::execute() /// Check if the current user has corresponding access rights with grant option. if (!query.access_rights_elements.empty()) - checkGrantOption(access_control, *getContext()->getAccess(), query, grantees); + checkGrantOptionAndGrantees(access_control, *getContext()->getAccess(), query, grantees); /// Update roles and users listed in `grantees`. auto update_func = [&](const AccessEntityPtr & entity) -> AccessEntityPtr From 9e4e871a3bd85ed513b63dff4b9e0e7d5e579ef0 Mon Sep 17 00:00:00 2001 From: vgocoder Date: Fri, 16 Jul 2021 10:07:16 +0800 Subject: [PATCH 16/29] Update atomic.md --- docs/zh/engines/database-engines/atomic.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/zh/engines/database-engines/atomic.md b/docs/zh/engines/database-engines/atomic.md index f019b94a00b..73e044b5e98 100644 --- a/docs/zh/engines/database-engines/atomic.md +++ b/docs/zh/engines/database-engines/atomic.md @@ -6,12 +6,12 @@ toc_title: Atomic # Atomic {#atomic} -It is supports non-blocking `DROP` and `RENAME TABLE` queries and atomic `EXCHANGE TABLES t1 AND t2` queries. Atomic database engine is used by default. +它支持非阻塞 DROP 和 RENAME TABLE 查询以及原子 EXCHANGE TABLES t1 AND t2 查询。默认情况下使用Atomic数据库引擎。 -## Creating a Database {#creating-a-database} +## 创建数据库 {#creating-a-database} ```sql CREATE DATABASE test ENGINE = Atomic; ``` -[Original article](https://clickhouse.tech/docs/en/engines/database_engines/atomic/) +[原文](https://clickhouse.tech/docs/en/engines/database_engines/atomic/) From d14b2ed483cf290cd3d33950aec746cc75d0fe9b Mon Sep 17 00:00:00 2001 From: vgocoder Date: Fri, 16 Jul 2021 10:19:34 +0800 Subject: [PATCH 17/29] Update versionedcollapsingmergetree.md --- .../mergetree-family/versionedcollapsingmergetree.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/zh/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md b/docs/zh/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md index 3b89da9f595..dc9871c1a31 100644 --- a/docs/zh/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md +++ b/docs/zh/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md @@ -3,7 +3,7 @@ toc_priority: 37 toc_title: "版本折叠MergeTree" --- -# 版本折叠MergeTree {#versionedcollapsingmergetree} +# VersionedCollapsingMergeTree {#versionedcollapsingmergetree} 这个引擎: From 67ba94dee144b4dcf2da441ad775655adf1ff118 Mon Sep 17 00:00:00 2001 From: vgocoder Date: Fri, 16 Jul 2021 10:20:58 +0800 Subject: [PATCH 18/29] Update collapsingmergetree.md --- .../table-engines/mergetree-family/collapsingmergetree.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/zh/engines/table-engines/mergetree-family/collapsingmergetree.md b/docs/zh/engines/table-engines/mergetree-family/collapsingmergetree.md index 6d1dfac7686..6fb57dc19d9 100644 --- a/docs/zh/engines/table-engines/mergetree-family/collapsingmergetree.md +++ b/docs/zh/engines/table-engines/mergetree-family/collapsingmergetree.md @@ -1,4 +1,4 @@ -# 折叠树 {#table_engine-collapsingmergetree} +# CollapsingMergeTree {#table_engine-collapsingmergetree} 该引擎继承于 [MergeTree](mergetree.md),并在数据块合并算法中添加了折叠行的逻辑。 @@ -203,4 +203,4 @@ SELECT * FROM UAct FINAL 这种查询数据的方法是非常低效的。不要在大表中使用它。 -[来源文章](https://clickhouse.tech/docs/en/operations/table_engines/collapsingmergetree/) +[原文](https://clickhouse.tech/docs/en/operations/table_engines/collapsingmergetree/) From 178ed3679e68b557f95c62f861f876b8a965e084 Mon Sep 17 00:00:00 2001 From: vgocoder Date: Fri, 16 Jul 2021 10:32:16 +0800 Subject: [PATCH 19/29] Update index.md --- docs/zh/operations/index.md | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/docs/zh/operations/index.md b/docs/zh/operations/index.md index f35858279f5..5139f083ceb 100644 --- a/docs/zh/operations/index.md +++ b/docs/zh/operations/index.md @@ -5,9 +5,21 @@ toc_title: "操作" # 操作 {#operations} -Clickhouse运维手册主要包含下面几部分: +ClickHouse操作手册由以下主要部分组成: -- 安装要求 +- [安装要求](../operations/requirements.md) +- [监控](../operations/monitoring.md) +- [故障排除](../operations/troubleshooting.md) +- [使用建议](../operations/tips.md) +- [更新程序](../operations/update.md) +- [访问权限](../operations/access-rights.md) +- [数据备份](../operations/backup.md) +- [配置文件](../operations/configuration-files.md) +- [配额](../operations/quotas.md) +- [系统表](../operations/system-tables/index.md) +- [服务器配置参数](../operations/server-configuration-parameters/index.md) +- [如何用ClickHouse测试你的硬件](../operations/performance-test.md) +- [设置](../operations/settings/index.md) +- [实用工具](../operations/utilities/index.md) - -[原始文章](https://clickhouse.tech/docs/en/operations/) +[原文](https://clickhouse.tech/docs/en/operations/) From 72ef21437939a823721e0c3a290177eb176cfe59 Mon Sep 17 00:00:00 2001 From: vgocoder Date: Fri, 16 Jul 2021 10:43:31 +0800 Subject: [PATCH 20/29] Update tcp.md --- docs/zh/interfaces/tcp.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/zh/interfaces/tcp.md b/docs/zh/interfaces/tcp.md index b779b9fea40..571fd22b758 100644 --- a/docs/zh/interfaces/tcp.md +++ b/docs/zh/interfaces/tcp.md @@ -5,6 +5,6 @@ toc_title: 原生接口(TCP) # 原生接口(TCP){#native-interface-tcp} -原生接口用于[命令行客户端](cli.md),用于分布式查询处理期间的服务器间通信,以及其他C++程序。可惜的是,原生的ClickHouse协议还没有正式的规范,但它可以从ClickHouse[源代码](https://github.com/ClickHouse/ClickHouse/tree/master/src/Client)通过拦截和分析TCP流量进行反向工程。 +原生接口协议用于[命令行客户端](cli.md),用于分布式查询处理期间的服务器间通信,以及其他C++ 程序。不幸的是,原生ClickHouse协议还没有正式的规范,但它可以从ClickHouse源代码[从这里开始](https://github.com/ClickHouse/ClickHouse/tree/master/src/Client)或通过拦截和分析TCP流量进行逆向工程。 -[来源文章](https://clickhouse.tech/docs/zh/interfaces/tcp/) +[原文](https://clickhouse.tech/docs/en/interfaces/tcp/) From 924319dc321dc16d8211d72343d2194e686eb625 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 16 Jul 2021 10:32:02 +0300 Subject: [PATCH 21/29] Less logging in AsynchronousMetrics --- src/Interpreters/AsynchronousMetrics.cpp | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/Interpreters/AsynchronousMetrics.cpp b/src/Interpreters/AsynchronousMetrics.cpp index da514759eb5..6b2940154f8 100644 --- a/src/Interpreters/AsynchronousMetrics.cpp +++ b/src/Interpreters/AsynchronousMetrics.cpp @@ -546,13 +546,16 @@ void AsynchronousMetrics::update(std::chrono::system_clock::time_point update_ti Int64 peak = total_memory_tracker.getPeak(); Int64 new_amount = data.resident; - LOG_DEBUG(&Poco::Logger::get("AsynchronousMetrics"), - "MemoryTracking: was {}, peak {}, will set to {} (RSS), difference: {}", - ReadableSize(amount), - ReadableSize(peak), - ReadableSize(new_amount), - ReadableSize(new_amount - amount) - ); + Int64 difference = new_amount - amount; + + /// Log only if difference is high. This is for convenience. The threshold is arbitrary. + if (difference >= 1048576 || difference <= -1048576) + LOG_TRACE(&Poco::Logger::get("AsynchronousMetrics"), + "MemoryTracking: was {}, peak {}, will set to {} (RSS), difference: {}", + ReadableSize(amount), + ReadableSize(peak), + ReadableSize(new_amount), + ReadableSize(difference)); total_memory_tracker.set(new_amount); CurrentMetrics::set(CurrentMetrics::MemoryTracking, new_amount); From 4dfd6ddcdb138adb11a70fb43b70c17500614149 Mon Sep 17 00:00:00 2001 From: Anton Ivashkin Date: Thu, 15 Jul 2021 16:06:25 +0300 Subject: [PATCH 22/29] Catch Poco exception in resolveIPAddressImpl --- src/Common/DNSResolver.cpp | 16 ++++++++++++++-- .../00965_shard_unresolvable_addresses.sql | 2 +- ...01946_test_wrong_host_name_access.reference | 2 ++ .../01946_test_wrong_host_name_access.sh | 18 ++++++++++++++++++ tests/queries/skip_list.json | 3 ++- 5 files changed, 37 insertions(+), 4 deletions(-) create mode 100644 tests/queries/0_stateless/01946_test_wrong_host_name_access.reference create mode 100755 tests/queries/0_stateless/01946_test_wrong_host_name_access.sh diff --git a/src/Common/DNSResolver.cpp b/src/Common/DNSResolver.cpp index 8b006bc550d..4fe0f0bb8c8 100644 --- a/src/Common/DNSResolver.cpp +++ b/src/Common/DNSResolver.cpp @@ -109,11 +109,23 @@ static DNSResolver::IPAddresses resolveIPAddressImpl(const std::string & host) /// It should not affect client address checking, since client cannot connect from IPv6 address /// if server has no IPv6 addresses. flags |= Poco::Net::DNS::DNS_HINT_AI_ADDRCONFIG; + + DNSResolver::IPAddresses addresses; + + try + { #if defined(ARCADIA_BUILD) - auto addresses = Poco::Net::DNS::hostByName(host, &Poco::Net::DNS::DEFAULT_DNS_TIMEOUT, flags).addresses(); + addresses = Poco::Net::DNS::hostByName(host, &Poco::Net::DNS::DEFAULT_DNS_TIMEOUT, flags).addresses(); #else - auto addresses = Poco::Net::DNS::hostByName(host, flags).addresses(); + addresses = Poco::Net::DNS::hostByName(host, flags).addresses(); #endif + } + catch (const Poco::Net::DNSException & e) + { + LOG_ERROR(&Poco::Logger::get("DNSResolver"), "Cannot resolve host ({}), error {}: {}.", host, e.code(), e.message()); + addresses.clear(); + } + if (addresses.empty()) throw Exception("Not found address of host: " + host, ErrorCodes::DNS_ERROR); diff --git a/tests/queries/0_stateless/00965_shard_unresolvable_addresses.sql b/tests/queries/0_stateless/00965_shard_unresolvable_addresses.sql index b6b981c7d00..6d57cd0447d 100644 --- a/tests/queries/0_stateless/00965_shard_unresolvable_addresses.sql +++ b/tests/queries/0_stateless/00965_shard_unresolvable_addresses.sql @@ -1,2 +1,2 @@ -SELECT count() FROM remote('127.0.0.1,localhos', system.one); -- { serverError 279 } +SELECT count() FROM remote('127.0.0.1,localhos', system.one); -- { serverError 198 } SELECT count() FROM remote('127.0.0.1|localhos', system.one); diff --git a/tests/queries/0_stateless/01946_test_wrong_host_name_access.reference b/tests/queries/0_stateless/01946_test_wrong_host_name_access.reference new file mode 100644 index 00000000000..1191247b6d9 --- /dev/null +++ b/tests/queries/0_stateless/01946_test_wrong_host_name_access.reference @@ -0,0 +1,2 @@ +1 +2 diff --git a/tests/queries/0_stateless/01946_test_wrong_host_name_access.sh b/tests/queries/0_stateless/01946_test_wrong_host_name_access.sh new file mode 100755 index 00000000000..288a3438dc9 --- /dev/null +++ b/tests/queries/0_stateless/01946_test_wrong_host_name_access.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash + +MYHOSTNAME=$(hostname -f) + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} --multiquery --query " + DROP USER IF EXISTS dns_fail_1, dns_fail_2; + CREATE USER dns_fail_1 HOST NAME 'non.existing.host.name', '${MYHOSTNAME}'; + CREATE USER dns_fail_2 HOST NAME '${MYHOSTNAME}', 'non.existing.host.name';" + +${CLICKHOUSE_CLIENT} --query "SELECT 1" --user dns_fail_1 --host ${MYHOSTNAME} + +${CLICKHOUSE_CLIENT} --query "SELECT 2" --user dns_fail_2 --host ${MYHOSTNAME} + +${CLICKHOUSE_CLIENT} --query "DROP USER IF EXISTS dns_fail_1, dns_fail_2" diff --git a/tests/queries/skip_list.json b/tests/queries/skip_list.json index fd800d3bc33..8bdb97cf350 100644 --- a/tests/queries/skip_list.json +++ b/tests/queries/skip_list.json @@ -514,6 +514,7 @@ "01915_create_or_replace_dictionary", "01925_test_storage_merge_aliases", "01933_client_replxx_convert_history", /// Uses non unique history file - "01902_table_function_merge_db_repr" + "01902_table_function_merge_db_repr", + "01946_test_wrong_host_name_access" ] } From 58f2f344add1d8508590327127f850b57aa9ffe9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 16 Jul 2021 10:53:30 +0300 Subject: [PATCH 23/29] Whitespaces --- src/Processors/Transforms/WindowTransform.h | 24 +++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/src/Processors/Transforms/WindowTransform.h b/src/Processors/Transforms/WindowTransform.h index 611b03ebf72..d7211f9edd7 100644 --- a/src/Processors/Transforms/WindowTransform.h +++ b/src/Processors/Transforms/WindowTransform.h @@ -139,7 +139,9 @@ public: } const Columns & inputAt(const RowNumber & x) const - { return const_cast(this)->inputAt(x); } + { + return const_cast(this)->inputAt(x); + } auto & blockAt(const uint64_t block_number) { @@ -149,13 +151,19 @@ public: } const auto & blockAt(const uint64_t block_number) const - { return const_cast(this)->blockAt(block_number); } + { + return const_cast(this)->blockAt(block_number); + } auto & blockAt(const RowNumber & x) - { return blockAt(x.block); } + { + return blockAt(x.block); + } const auto & blockAt(const RowNumber & x) const - { return const_cast(this)->blockAt(x); } + { + return const_cast(this)->blockAt(x); + } size_t blockRowsNumber(const RowNumber & x) const { @@ -225,10 +233,14 @@ public: } RowNumber blocksEnd() const - { return RowNumber{first_block_number + blocks.size(), 0}; } + { + return RowNumber{first_block_number + blocks.size(), 0}; + } RowNumber blocksBegin() const - { return RowNumber{first_block_number, 0}; } + { + return RowNumber{first_block_number, 0}; + } public: /* From 813c37b9fe3f63058892b7e13d94df5373e6f9ac Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Fri, 16 Jul 2021 10:57:55 +0300 Subject: [PATCH 24/29] Update merge-tree-settings.md --- docs/en/operations/settings/merge-tree-settings.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/en/operations/settings/merge-tree-settings.md b/docs/en/operations/settings/merge-tree-settings.md index 6c98187ebbf..2acfa13c611 100644 --- a/docs/en/operations/settings/merge-tree-settings.md +++ b/docs/en/operations/settings/merge-tree-settings.md @@ -280,14 +280,14 @@ Default value: `0`. ## check_sample_column_is_correct {#check_sample_column_is_correct} -Enables to check column for sampling or sampling expression is correct at creation table. +Enables to check column for sampling or sampling expression is correct at table creation. Possible values: -- true — Check column or sampling expression is correct at creation table -- false — Check column or sampling expression is at creation table will be ignored. +- true — Check column or sampling expression is correct during table creation +- false — Check column or sampling expression is at table creation will be ignored. Default value: `true`. -By default, the ClickHouse server check column for sampling or sampling expression at creation table. If you already had tables with incorrect sampling expression, set value `false` to make ClickHouse server do not raise exception when ClickHouse server is starting. +By default, the ClickHouse server check column for sampling or sampling expression at table creation. If you already had tables with incorrect sampling expression, set value `false` to make ClickHouse server do not raise exception when ClickHouse server is starting. [Original article](https://clickhouse.tech/docs/en/operations/settings/merge_tree_settings/) From 5a80cbb6ccde8f0b2740e9a4fd993c2fc4cdfa1b Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Fri, 16 Jul 2021 10:59:29 +0300 Subject: [PATCH 25/29] Update merge-tree-settings.md --- docs/en/operations/settings/merge-tree-settings.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/operations/settings/merge-tree-settings.md b/docs/en/operations/settings/merge-tree-settings.md index 2acfa13c611..a17d866f550 100644 --- a/docs/en/operations/settings/merge-tree-settings.md +++ b/docs/en/operations/settings/merge-tree-settings.md @@ -284,8 +284,8 @@ Enables to check column for sampling or sampling expression is correct at table Possible values: -- true — Check column or sampling expression is correct during table creation -- false — Check column or sampling expression is at table creation will be ignored. +- true — Check column or sampling expression is correct at table creation +- false — Check column or sampling expression is correct at table creation will be ignored. Default value: `true`. From 76e3bd87bca51803ba2ce4e09f0bb437b733a52f Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Fri, 16 Jul 2021 11:00:36 +0300 Subject: [PATCH 26/29] Update merge-tree-settings.md --- docs/en/operations/settings/merge-tree-settings.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/operations/settings/merge-tree-settings.md b/docs/en/operations/settings/merge-tree-settings.md index a17d866f550..9ace094a4d8 100644 --- a/docs/en/operations/settings/merge-tree-settings.md +++ b/docs/en/operations/settings/merge-tree-settings.md @@ -284,8 +284,8 @@ Enables to check column for sampling or sampling expression is correct at table Possible values: -- true — Check column or sampling expression is correct at table creation -- false — Check column or sampling expression is correct at table creation will be ignored. +- true — Check column or sampling expression is correct at table creation. +- false — Do not check column or sampling expression is correct at table creation. Default value: `true`. From f4da8c87bc40b9d2359cb537a64134956e3b7107 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 16 Jul 2021 11:22:03 +0300 Subject: [PATCH 27/29] Fix failed assertion in RocksDB in case of bad_alloc exception during batch write --- contrib/rocksdb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/rocksdb b/contrib/rocksdb index 07c77549a20..6ff0adefdc8 160000 --- a/contrib/rocksdb +++ b/contrib/rocksdb @@ -1 +1 @@ -Subproject commit 07c77549a20b63ff6981b400085eba36bb5c80c4 +Subproject commit 6ff0adefdc84dac44e78804f7ca4122fe992cf8d From a489ac046e6ac70bb94ceadaa2743c7fa8c0e410 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 16 Jul 2021 14:14:43 +0300 Subject: [PATCH 28/29] Update version_date.tsv after release 21.6.8.62 --- utils/list-versions/version_date.tsv | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index 28c2d7b1523..c8a8f3ac228 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -1,5 +1,6 @@ v21.7.3.14-stable 2021-07-13 v21.7.2.7-stable 2021-07-09 +v21.6.8.62-stable 2021-07-13 v21.6.7.57-stable 2021-07-09 v21.6.6.51-stable 2021-07-02 v21.6.5.37-stable 2021-06-19 From 70de8dbf743fe6c1a4cc03da182124814deac6ce Mon Sep 17 00:00:00 2001 From: vdimir Date: Fri, 16 Jul 2021 14:39:56 +0300 Subject: [PATCH 29/29] Add integrity for loaded scripts in play.html --- programs/server/play.html | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/programs/server/play.html b/programs/server/play.html index 5e0377aa8f7..7b13807f2d9 100644 --- a/programs/server/play.html +++ b/programs/server/play.html @@ -613,10 +613,16 @@ } /// Huge JS libraries should be loaded only if needed. - function loadJS(src) { + function loadJS(src, integrity) { return new Promise((resolve, reject) => { const script = document.createElement('script'); script.src = src; + if (integrity) { + script.crossOrigin = 'anonymous'; + script.integrity = integrity; + } else { + console.warn('no integrity for', src) + } script.addEventListener('load', function() { resolve(true); }); document.head.appendChild(script); }); @@ -627,10 +633,14 @@ if (load_dagre_promise) { return load_dagre_promise; } load_dagre_promise = Promise.all([ - loadJS('https://dagrejs.github.io/project/dagre/v0.8.5/dagre.min.js'), - loadJS('https://dagrejs.github.io/project/graphlib-dot/v0.6.4/graphlib-dot.min.js'), - loadJS('https://dagrejs.github.io/project/dagre-d3/v0.6.4/dagre-d3.min.js'), - loadJS('https://cdn.jsdelivr.net/npm/d3@7.0.0'), + loadJS('https://dagrejs.github.io/project/dagre/v0.8.5/dagre.min.js', + 'sha384-2IH3T69EIKYC4c+RXZifZRvaH5SRUdacJW7j6HtE5rQbvLhKKdawxq6vpIzJ7j9M'), + loadJS('https://dagrejs.github.io/project/graphlib-dot/v0.6.4/graphlib-dot.min.js', + 'sha384-Q7oatU+b+y0oTkSoiRH9wTLH6sROySROCILZso/AbMMm9uKeq++r8ujD4l4f+CWj'), + loadJS('https://dagrejs.github.io/project/dagre-d3/v0.6.4/dagre-d3.min.js', + 'sha384-9N1ty7Yz7VKL3aJbOk+8ParYNW8G5W+MvxEfFL9G7CRYPmkHI9gJqyAfSI/8190W'), + loadJS('https://cdn.jsdelivr.net/npm/d3@7.0.0', + 'sha384-S+Kf0r6YzKIhKA8d1k2/xtYv+j0xYUU3E7+5YLrcPVab6hBh/r1J6cq90OXhw80u'), ]); return load_dagre_promise;